@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -176,15 +176,15 @@
|
|
|
176
176
|
#ifdef GGML_SHARED
|
|
177
177
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
|
178
178
|
# ifdef GGML_BUILD
|
|
179
|
-
# define GGML_API __declspec(dllexport)
|
|
179
|
+
# define GGML_API __declspec(dllexport) extern
|
|
180
180
|
# else
|
|
181
|
-
# define GGML_API __declspec(dllimport)
|
|
181
|
+
# define GGML_API __declspec(dllimport) extern
|
|
182
182
|
# endif
|
|
183
183
|
# else
|
|
184
|
-
# define GGML_API __attribute__ ((visibility ("default")))
|
|
184
|
+
# define GGML_API __attribute__ ((visibility ("default"))) extern
|
|
185
185
|
# endif
|
|
186
186
|
#else
|
|
187
|
-
# define GGML_API
|
|
187
|
+
# define GGML_API extern
|
|
188
188
|
#endif
|
|
189
189
|
|
|
190
190
|
// TODO: support for clang
|
|
@@ -217,7 +217,6 @@
|
|
|
217
217
|
|
|
218
218
|
#define GGML_MAX_DIMS 4
|
|
219
219
|
#define GGML_MAX_PARAMS 2048
|
|
220
|
-
#define GGML_MAX_CONTEXTS 64
|
|
221
220
|
#define GGML_MAX_SRC 10
|
|
222
221
|
#define GGML_MAX_N_THREADS 512
|
|
223
222
|
#define GGML_MAX_OP_PARAMS 64
|
|
@@ -238,7 +237,9 @@
|
|
|
238
237
|
#define GGML_EXIT_SUCCESS 0
|
|
239
238
|
#define GGML_EXIT_ABORTED 1
|
|
240
239
|
|
|
241
|
-
#define GGML_ROPE_TYPE_NEOX
|
|
240
|
+
#define GGML_ROPE_TYPE_NEOX 2
|
|
241
|
+
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
|
+
#define GGML_ROPE_TYPE_VISION 24
|
|
242
243
|
|
|
243
244
|
#define GGUF_MAGIC "GGUF"
|
|
244
245
|
|
|
@@ -385,12 +386,15 @@ extern "C" {
|
|
|
385
386
|
GGML_TYPE_F64 = 28,
|
|
386
387
|
GGML_TYPE_IQ1_M = 29,
|
|
387
388
|
GGML_TYPE_BF16 = 30,
|
|
388
|
-
GGML_TYPE_Q4_0_4_4 = 31,
|
|
389
|
-
GGML_TYPE_Q4_0_4_8 = 32,
|
|
390
|
-
GGML_TYPE_Q4_0_8_8 = 33,
|
|
389
|
+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
|
390
|
+
// GGML_TYPE_Q4_0_4_8 = 32,
|
|
391
|
+
// GGML_TYPE_Q4_0_8_8 = 33,
|
|
391
392
|
GGML_TYPE_TQ1_0 = 34,
|
|
392
393
|
GGML_TYPE_TQ2_0 = 35,
|
|
393
|
-
|
|
394
|
+
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
|
395
|
+
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
396
|
+
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
397
|
+
GGML_TYPE_COUNT = 39,
|
|
394
398
|
};
|
|
395
399
|
|
|
396
400
|
// precision
|
|
@@ -431,9 +435,6 @@ extern "C" {
|
|
|
431
435
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
432
436
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
433
437
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
434
|
-
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
|
|
435
|
-
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
|
|
436
|
-
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
|
|
437
438
|
};
|
|
438
439
|
|
|
439
440
|
// available tensor operations:
|
|
@@ -497,6 +498,7 @@ extern "C" {
|
|
|
497
498
|
GGML_OP_POOL_2D_BACK,
|
|
498
499
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
499
500
|
GGML_OP_PAD,
|
|
501
|
+
GGML_OP_PAD_REFLECT_1D,
|
|
500
502
|
GGML_OP_ARANGE,
|
|
501
503
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
502
504
|
GGML_OP_ARGSORT,
|
|
@@ -510,7 +512,7 @@ extern "C" {
|
|
|
510
512
|
GGML_OP_WIN_UNPART,
|
|
511
513
|
GGML_OP_GET_REL_POS,
|
|
512
514
|
GGML_OP_ADD_REL_POS,
|
|
513
|
-
|
|
515
|
+
GGML_OP_RWKV_WKV6,
|
|
514
516
|
|
|
515
517
|
GGML_OP_UNARY,
|
|
516
518
|
|
|
@@ -559,10 +561,10 @@ extern "C" {
|
|
|
559
561
|
|
|
560
562
|
enum ggml_log_level {
|
|
561
563
|
GGML_LOG_LEVEL_NONE = 0,
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
564
|
+
GGML_LOG_LEVEL_DEBUG = 1,
|
|
565
|
+
GGML_LOG_LEVEL_INFO = 2,
|
|
566
|
+
GGML_LOG_LEVEL_WARN = 3,
|
|
567
|
+
GGML_LOG_LEVEL_ERROR = 4,
|
|
566
568
|
GGML_LOG_LEVEL_CONT = 5, // continue previous log
|
|
567
569
|
};
|
|
568
570
|
|
|
@@ -574,6 +576,13 @@ extern "C" {
|
|
|
574
576
|
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
575
577
|
};
|
|
576
578
|
|
|
579
|
+
struct ggml_init_params {
|
|
580
|
+
// memory pool
|
|
581
|
+
size_t mem_size; // bytes
|
|
582
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
583
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
|
584
|
+
};
|
|
585
|
+
|
|
577
586
|
// n-dimensional tensor
|
|
578
587
|
struct ggml_tensor {
|
|
579
588
|
enum ggml_type type;
|
|
@@ -596,7 +605,6 @@ extern "C" {
|
|
|
596
605
|
|
|
597
606
|
int32_t flags;
|
|
598
607
|
|
|
599
|
-
struct ggml_tensor * grad;
|
|
600
608
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
|
601
609
|
|
|
602
610
|
// source tensor and offset for views
|
|
@@ -609,7 +617,7 @@ extern "C" {
|
|
|
609
617
|
|
|
610
618
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
611
619
|
|
|
612
|
-
|
|
620
|
+
char padding[8];
|
|
613
621
|
};
|
|
614
622
|
|
|
615
623
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
|
@@ -619,66 +627,6 @@ extern "C" {
|
|
|
619
627
|
// If it returns true, the computation is aborted
|
|
620
628
|
typedef bool (*ggml_abort_callback)(void * data);
|
|
621
629
|
|
|
622
|
-
// Scheduling priorities
|
|
623
|
-
enum ggml_sched_priority {
|
|
624
|
-
GGML_SCHED_PRIO_NORMAL,
|
|
625
|
-
GGML_SCHED_PRIO_MEDIUM,
|
|
626
|
-
GGML_SCHED_PRIO_HIGH,
|
|
627
|
-
GGML_SCHED_PRIO_REALTIME
|
|
628
|
-
};
|
|
629
|
-
|
|
630
|
-
// Threadpool params
|
|
631
|
-
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
|
632
|
-
struct ggml_threadpool_params {
|
|
633
|
-
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
634
|
-
int n_threads; // number of threads
|
|
635
|
-
enum ggml_sched_priority prio; // thread priority
|
|
636
|
-
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
637
|
-
bool strict_cpu; // strict cpu placement
|
|
638
|
-
bool paused; // start in paused state
|
|
639
|
-
};
|
|
640
|
-
|
|
641
|
-
struct ggml_threadpool; // forward declaration, see ggml.c
|
|
642
|
-
|
|
643
|
-
typedef struct ggml_threadpool * ggml_threadpool_t;
|
|
644
|
-
|
|
645
|
-
// the compute plan that needs to be prepared for ggml_graph_compute()
|
|
646
|
-
// since https://github.com/ggerganov/ggml/issues/287
|
|
647
|
-
struct ggml_cplan {
|
|
648
|
-
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
|
649
|
-
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
|
650
|
-
|
|
651
|
-
int n_threads;
|
|
652
|
-
struct ggml_threadpool * threadpool;
|
|
653
|
-
|
|
654
|
-
// abort ggml_graph_compute when true
|
|
655
|
-
ggml_abort_callback abort_callback;
|
|
656
|
-
void * abort_callback_data;
|
|
657
|
-
};
|
|
658
|
-
|
|
659
|
-
// scratch buffer
|
|
660
|
-
struct ggml_scratch {
|
|
661
|
-
size_t offs;
|
|
662
|
-
size_t size;
|
|
663
|
-
void * data;
|
|
664
|
-
};
|
|
665
|
-
|
|
666
|
-
struct ggml_init_params {
|
|
667
|
-
// memory pool
|
|
668
|
-
size_t mem_size; // bytes
|
|
669
|
-
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
670
|
-
bool no_alloc; // don't allocate memory for the tensor data
|
|
671
|
-
};
|
|
672
|
-
|
|
673
|
-
// numa strategies
|
|
674
|
-
enum ggml_numa_strategy {
|
|
675
|
-
GGML_NUMA_STRATEGY_DISABLED = 0,
|
|
676
|
-
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
|
677
|
-
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
|
678
|
-
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
|
679
|
-
GGML_NUMA_STRATEGY_MIRROR = 4,
|
|
680
|
-
GGML_NUMA_STRATEGY_COUNT
|
|
681
|
-
};
|
|
682
630
|
|
|
683
631
|
//
|
|
684
632
|
// GUID
|
|
@@ -701,9 +649,6 @@ extern "C" {
|
|
|
701
649
|
// accepts a UTF-8 path, even on Windows
|
|
702
650
|
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
|
703
651
|
|
|
704
|
-
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
|
705
|
-
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
|
706
|
-
|
|
707
652
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
|
708
653
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
|
709
654
|
|
|
@@ -760,12 +705,12 @@ extern "C" {
|
|
|
760
705
|
|
|
761
706
|
// main
|
|
762
707
|
|
|
763
|
-
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
|
764
|
-
GGML_API void
|
|
708
|
+
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
|
|
709
|
+
GGML_API void ggml_reset(struct ggml_context * ctx);
|
|
710
|
+
GGML_API void ggml_free (struct ggml_context * ctx);
|
|
765
711
|
|
|
766
712
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
767
713
|
|
|
768
|
-
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
|
769
714
|
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
|
770
715
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
|
771
716
|
|
|
@@ -805,8 +750,7 @@ extern "C" {
|
|
|
805
750
|
int64_t ne2,
|
|
806
751
|
int64_t ne3);
|
|
807
752
|
|
|
808
|
-
GGML_API
|
|
809
|
-
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
|
753
|
+
GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
|
|
810
754
|
|
|
811
755
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
812
756
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
|
@@ -816,35 +760,25 @@ extern "C" {
|
|
|
816
760
|
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
|
817
761
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
|
818
762
|
|
|
819
|
-
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
820
|
-
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
|
821
|
-
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
|
822
|
-
|
|
823
763
|
// Converts a flat index into coordinates
|
|
824
|
-
GGML_API void
|
|
825
|
-
|
|
826
|
-
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
|
827
|
-
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
|
764
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
828
765
|
|
|
829
|
-
GGML_API
|
|
830
|
-
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
|
831
|
-
|
|
832
|
-
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
|
833
|
-
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
|
834
|
-
|
|
835
|
-
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
|
836
|
-
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
|
766
|
+
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
837
767
|
|
|
838
768
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
839
769
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
840
770
|
|
|
841
|
-
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
842
|
-
|
|
843
771
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
|
844
772
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
|
845
773
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
|
846
774
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
|
847
775
|
|
|
776
|
+
// Tensor flags
|
|
777
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
778
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
779
|
+
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
|
|
780
|
+
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
|
781
|
+
|
|
848
782
|
//
|
|
849
783
|
// operations on tensors with backpropagation
|
|
850
784
|
//
|
|
@@ -1511,6 +1445,22 @@ extern "C" {
|
|
|
1511
1445
|
float beta_fast,
|
|
1512
1446
|
float beta_slow);
|
|
1513
1447
|
|
|
1448
|
+
GGML_API struct ggml_tensor * ggml_rope_multi(
|
|
1449
|
+
struct ggml_context * ctx,
|
|
1450
|
+
struct ggml_tensor * a,
|
|
1451
|
+
struct ggml_tensor * b,
|
|
1452
|
+
struct ggml_tensor * c,
|
|
1453
|
+
int n_dims,
|
|
1454
|
+
int sections[4],
|
|
1455
|
+
int mode,
|
|
1456
|
+
int n_ctx_orig,
|
|
1457
|
+
float freq_base,
|
|
1458
|
+
float freq_scale,
|
|
1459
|
+
float ext_factor,
|
|
1460
|
+
float attn_factor,
|
|
1461
|
+
float beta_fast,
|
|
1462
|
+
float beta_slow);
|
|
1463
|
+
|
|
1514
1464
|
// in-place, returns view(a)
|
|
1515
1465
|
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
|
1516
1466
|
struct ggml_context * ctx,
|
|
@@ -1558,7 +1508,7 @@ extern "C" {
|
|
|
1558
1508
|
"use ggml_rope_ext_inplace instead");
|
|
1559
1509
|
|
|
1560
1510
|
// compute correction dims for YaRN RoPE scaling
|
|
1561
|
-
void ggml_rope_yarn_corr_dims(
|
|
1511
|
+
GGML_API void ggml_rope_yarn_corr_dims(
|
|
1562
1512
|
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1563
1513
|
|
|
1564
1514
|
// rotary position embedding backward, i.e compute dx from dy
|
|
@@ -1614,17 +1564,6 @@ extern "C" {
|
|
|
1614
1564
|
int d1, // dilation dimension 1
|
|
1615
1565
|
bool is_2D);
|
|
1616
1566
|
|
|
1617
|
-
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
|
1618
|
-
struct ggml_context * ctx,
|
|
1619
|
-
struct ggml_tensor * a, // convolution kernel
|
|
1620
|
-
struct ggml_tensor * b, // data
|
|
1621
|
-
int s0, // stride dimension 0
|
|
1622
|
-
int s1, // stride dimension 1
|
|
1623
|
-
int p0, // padding dimension 0
|
|
1624
|
-
int p1, // padding dimension 1
|
|
1625
|
-
int d0, // dilation dimension 0
|
|
1626
|
-
int d1); // dilation dimension 1
|
|
1627
|
-
|
|
1628
1567
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
1629
1568
|
struct ggml_context * ctx,
|
|
1630
1569
|
struct ggml_tensor * a, // convolution kernel
|
|
@@ -1642,6 +1581,23 @@ extern "C" {
|
|
|
1642
1581
|
int s, // stride
|
|
1643
1582
|
int d); // dilation
|
|
1644
1583
|
|
|
1584
|
+
// depthwise
|
|
1585
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
|
1586
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw(
|
|
1587
|
+
struct ggml_context * ctx,
|
|
1588
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1589
|
+
struct ggml_tensor * b, // data
|
|
1590
|
+
int s0, // stride
|
|
1591
|
+
int p0, // padding
|
|
1592
|
+
int d0); // dilation
|
|
1593
|
+
|
|
1594
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
|
|
1595
|
+
struct ggml_context * ctx,
|
|
1596
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1597
|
+
struct ggml_tensor * b, // data
|
|
1598
|
+
int s0, // stride
|
|
1599
|
+
int d0); // dilation
|
|
1600
|
+
|
|
1645
1601
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
1646
1602
|
struct ggml_context * ctx,
|
|
1647
1603
|
struct ggml_tensor * a, // convolution kernel
|
|
@@ -1661,7 +1617,6 @@ extern "C" {
|
|
|
1661
1617
|
int d0, // dilation dimension 0
|
|
1662
1618
|
int d1); // dilation dimension 1
|
|
1663
1619
|
|
|
1664
|
-
|
|
1665
1620
|
// kernel size is a->ne[0] x a->ne[1]
|
|
1666
1621
|
// stride is equal to kernel size
|
|
1667
1622
|
// padding is zero
|
|
@@ -1688,6 +1643,18 @@ extern "C" {
|
|
|
1688
1643
|
struct ggml_tensor * a,
|
|
1689
1644
|
struct ggml_tensor * b);
|
|
1690
1645
|
|
|
1646
|
+
// depthwise
|
|
1647
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
|
1648
|
+
struct ggml_context * ctx,
|
|
1649
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1650
|
+
struct ggml_tensor * b, // data
|
|
1651
|
+
int s0, // stride dimension 0
|
|
1652
|
+
int s1, // stride dimension 1
|
|
1653
|
+
int p0, // padding dimension 0
|
|
1654
|
+
int p1, // padding dimension 1
|
|
1655
|
+
int d0, // dilation dimension 0
|
|
1656
|
+
int d1); // dilation dimension 1
|
|
1657
|
+
|
|
1691
1658
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
1692
1659
|
struct ggml_context * ctx,
|
|
1693
1660
|
struct ggml_tensor * a,
|
|
@@ -1761,6 +1728,13 @@ extern "C" {
|
|
|
1761
1728
|
int p2,
|
|
1762
1729
|
int p3);
|
|
1763
1730
|
|
|
1731
|
+
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
1732
|
+
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
|
1733
|
+
struct ggml_context * ctx,
|
|
1734
|
+
struct ggml_tensor * a,
|
|
1735
|
+
int p0,
|
|
1736
|
+
int p1);
|
|
1737
|
+
|
|
1764
1738
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1765
1739
|
// timesteps: [N,]
|
|
1766
1740
|
// return: [N, dim]
|
|
@@ -1814,6 +1788,9 @@ extern "C" {
|
|
|
1814
1788
|
struct ggml_tensor * a,
|
|
1815
1789
|
enum ggml_prec prec);
|
|
1816
1790
|
|
|
1791
|
+
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
|
1792
|
+
const struct ggml_tensor * a);
|
|
1793
|
+
|
|
1817
1794
|
// TODO: needs to be adapted to ggml_flash_attn_ext
|
|
1818
1795
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
|
1819
1796
|
struct ggml_context * ctx,
|
|
@@ -1887,7 +1864,7 @@ extern "C" {
|
|
|
1887
1864
|
struct ggml_tensor * pw,
|
|
1888
1865
|
struct ggml_tensor * ph);
|
|
1889
1866
|
|
|
1890
|
-
GGML_API struct ggml_tensor *
|
|
1867
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
|
|
1891
1868
|
struct ggml_context * ctx,
|
|
1892
1869
|
struct ggml_tensor * k,
|
|
1893
1870
|
struct ggml_tensor * v,
|
|
@@ -2050,31 +2027,20 @@ extern "C" {
|
|
|
2050
2027
|
struct ggml_context * ctx,
|
|
2051
2028
|
struct ggml_tensor * a,
|
|
2052
2029
|
struct ggml_tensor * grad,
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
float eps,
|
|
2057
|
-
float wd); // weight decay
|
|
2030
|
+
struct ggml_tensor * m,
|
|
2031
|
+
struct ggml_tensor * v,
|
|
2032
|
+
struct ggml_tensor * adamw_params); // parameters such a the learning rate
|
|
2058
2033
|
|
|
2059
2034
|
//
|
|
2060
2035
|
// automatic differentiation
|
|
2061
2036
|
//
|
|
2062
2037
|
|
|
2063
|
-
GGML_API void
|
|
2064
|
-
GGML_API void
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
GGML_API void ggml_build_opt_adamw(
|
|
2070
|
-
struct ggml_context * ctx,
|
|
2071
|
-
struct ggml_cgraph * gf,
|
|
2072
|
-
struct ggml_cgraph * gb,
|
|
2073
|
-
float alpha,
|
|
2074
|
-
float beta1,
|
|
2075
|
-
float beta2,
|
|
2076
|
-
float eps,
|
|
2077
|
-
float wd); // weight decay
|
|
2038
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
2039
|
+
GGML_API void ggml_build_backward_expand(
|
|
2040
|
+
struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
|
|
2041
|
+
struct ggml_context * ctx_compute, // context for gradient computation
|
|
2042
|
+
struct ggml_cgraph * cgraph,
|
|
2043
|
+
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
|
2078
2044
|
|
|
2079
2045
|
// graph allocation in a context
|
|
2080
2046
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
|
@@ -2094,28 +2060,9 @@ extern "C" {
|
|
|
2094
2060
|
GGML_API size_t ggml_graph_overhead(void);
|
|
2095
2061
|
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
|
2096
2062
|
|
|
2097
|
-
GGML_API struct
|
|
2098
|
-
GGML_API
|
|
2099
|
-
GGML_API
|
|
2100
|
-
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
|
2101
|
-
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
|
2102
|
-
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
|
|
2103
|
-
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
|
2104
|
-
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
|
2105
|
-
|
|
2106
|
-
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
|
2107
|
-
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
2108
|
-
GGML_API struct ggml_cplan ggml_graph_plan(
|
|
2109
|
-
const struct ggml_cgraph * cgraph,
|
|
2110
|
-
int n_threads, /* = GGML_DEFAULT_N_THREADS */
|
|
2111
|
-
struct ggml_threadpool * threadpool /* = NULL */ );
|
|
2112
|
-
GGML_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
|
2113
|
-
|
|
2114
|
-
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
|
2115
|
-
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
|
2116
|
-
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
|
2117
|
-
|
|
2118
|
-
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
|
2063
|
+
GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
|
|
2064
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2065
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2119
2066
|
|
|
2120
2067
|
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
2121
2068
|
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
@@ -2126,201 +2073,14 @@ extern "C" {
|
|
|
2126
2073
|
// dump the graph into a file using the dot format
|
|
2127
2074
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
|
2128
2075
|
|
|
2129
|
-
//
|
|
2130
|
-
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
|
2131
|
-
// but without the second forward pass nodes.
|
|
2132
|
-
GGML_API void ggml_build_backward_gradient_checkpointing(
|
|
2133
|
-
struct ggml_context * ctx,
|
|
2134
|
-
struct ggml_cgraph * gf,
|
|
2135
|
-
struct ggml_cgraph * gb,
|
|
2136
|
-
struct ggml_cgraph * gb_tmp,
|
|
2137
|
-
struct ggml_tensor * * checkpoints,
|
|
2138
|
-
int n_checkpoints);
|
|
2139
|
-
//
|
|
2140
|
-
// optimization
|
|
2141
|
-
//
|
|
2142
|
-
|
|
2143
|
-
// optimization methods
|
|
2144
|
-
enum ggml_opt_type {
|
|
2145
|
-
GGML_OPT_TYPE_ADAM,
|
|
2146
|
-
GGML_OPT_TYPE_LBFGS,
|
|
2147
|
-
};
|
|
2148
|
-
|
|
2149
|
-
// linesearch methods
|
|
2150
|
-
enum ggml_linesearch {
|
|
2151
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
|
2152
|
-
|
|
2153
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
|
2154
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
|
2155
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
|
2156
|
-
};
|
|
2157
|
-
|
|
2158
|
-
// optimization return values
|
|
2159
|
-
enum ggml_opt_result {
|
|
2160
|
-
GGML_OPT_RESULT_OK = 0,
|
|
2161
|
-
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
|
2162
|
-
GGML_OPT_RESULT_NO_CONTEXT,
|
|
2163
|
-
GGML_OPT_RESULT_INVALID_WOLFE,
|
|
2164
|
-
GGML_OPT_RESULT_FAIL,
|
|
2165
|
-
GGML_OPT_RESULT_CANCEL,
|
|
2166
|
-
|
|
2167
|
-
GGML_LINESEARCH_FAIL = -128,
|
|
2168
|
-
GGML_LINESEARCH_MINIMUM_STEP,
|
|
2169
|
-
GGML_LINESEARCH_MAXIMUM_STEP,
|
|
2170
|
-
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
|
2171
|
-
GGML_LINESEARCH_INVALID_PARAMETERS,
|
|
2172
|
-
};
|
|
2173
|
-
|
|
2174
|
-
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
|
2076
|
+
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
|
|
2175
2077
|
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
|
2176
2078
|
|
|
2177
2079
|
// Set callback for all future logging events.
|
|
2178
2080
|
// If this is not called, or NULL is supplied, everything is output on stderr.
|
|
2179
2081
|
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
|
2180
2082
|
|
|
2181
|
-
|
|
2182
|
-
//
|
|
2183
|
-
// see ggml.c (ggml_opt_default_params) for default values
|
|
2184
|
-
//
|
|
2185
|
-
struct ggml_opt_params {
|
|
2186
|
-
enum ggml_opt_type type;
|
|
2187
|
-
|
|
2188
|
-
size_t graph_size;
|
|
2189
|
-
|
|
2190
|
-
int n_threads;
|
|
2191
|
-
|
|
2192
|
-
// delta-based convergence test
|
|
2193
|
-
//
|
|
2194
|
-
// if past == 0 - disabled
|
|
2195
|
-
// if past > 0:
|
|
2196
|
-
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
|
2197
|
-
//
|
|
2198
|
-
int past;
|
|
2199
|
-
float delta;
|
|
2200
|
-
|
|
2201
|
-
// maximum number of iterations without improvement
|
|
2202
|
-
//
|
|
2203
|
-
// if 0 - disabled
|
|
2204
|
-
// if > 0:
|
|
2205
|
-
// assume convergence if no cost improvement in this number of iterations
|
|
2206
|
-
//
|
|
2207
|
-
int max_no_improvement;
|
|
2208
|
-
|
|
2209
|
-
bool print_forward_graph;
|
|
2210
|
-
bool print_backward_graph;
|
|
2211
|
-
|
|
2212
|
-
int n_gradient_accumulation;
|
|
2213
|
-
|
|
2214
|
-
// ADAM parameters
|
|
2215
|
-
struct {
|
|
2216
|
-
int n_iter;
|
|
2217
|
-
|
|
2218
|
-
float sched; // schedule multiplier (fixed, decay or warmup)
|
|
2219
|
-
float decay; // weight decay for AdamW, use 0.0f to disable
|
|
2220
|
-
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
|
2221
|
-
float alpha; // learning rate
|
|
2222
|
-
float beta1;
|
|
2223
|
-
float beta2;
|
|
2224
|
-
float eps; // epsilon for numerical stability
|
|
2225
|
-
float eps_f; // epsilon for convergence test
|
|
2226
|
-
float eps_g; // epsilon for convergence test
|
|
2227
|
-
float gclip; // gradient clipping
|
|
2228
|
-
} adam;
|
|
2229
|
-
|
|
2230
|
-
// LBFGS parameters
|
|
2231
|
-
struct {
|
|
2232
|
-
int m; // number of corrections to approximate the inv. Hessian
|
|
2233
|
-
int n_iter;
|
|
2234
|
-
int max_linesearch;
|
|
2235
|
-
|
|
2236
|
-
float eps; // convergence tolerance
|
|
2237
|
-
float ftol; // line search tolerance
|
|
2238
|
-
float wolfe;
|
|
2239
|
-
float min_step;
|
|
2240
|
-
float max_step;
|
|
2241
|
-
|
|
2242
|
-
enum ggml_linesearch linesearch;
|
|
2243
|
-
} lbfgs;
|
|
2244
|
-
};
|
|
2245
|
-
|
|
2246
|
-
struct ggml_opt_context {
|
|
2247
|
-
struct ggml_context * ctx;
|
|
2248
|
-
struct ggml_opt_params params;
|
|
2249
|
-
|
|
2250
|
-
int iter;
|
|
2251
|
-
int64_t nx; // number of parameter elements
|
|
2252
|
-
|
|
2253
|
-
bool just_initialized;
|
|
2254
|
-
|
|
2255
|
-
float loss_before;
|
|
2256
|
-
float loss_after;
|
|
2257
|
-
|
|
2258
|
-
struct {
|
|
2259
|
-
struct ggml_tensor * g; // current gradient
|
|
2260
|
-
struct ggml_tensor * m; // first moment
|
|
2261
|
-
struct ggml_tensor * v; // second moment
|
|
2262
|
-
struct ggml_tensor * pf; // past function values
|
|
2263
|
-
float fx_best;
|
|
2264
|
-
float fx_prev;
|
|
2265
|
-
int n_no_improvement;
|
|
2266
|
-
} adam;
|
|
2267
|
-
|
|
2268
|
-
struct {
|
|
2269
|
-
struct ggml_tensor * x; // current parameters
|
|
2270
|
-
struct ggml_tensor * xp; // previous parameters
|
|
2271
|
-
struct ggml_tensor * g; // current gradient
|
|
2272
|
-
struct ggml_tensor * gp; // previous gradient
|
|
2273
|
-
struct ggml_tensor * d; // search direction
|
|
2274
|
-
struct ggml_tensor * pf; // past function values
|
|
2275
|
-
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
|
2276
|
-
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
|
2277
|
-
struct ggml_tensor * lms; // the L-BFGS memory s
|
|
2278
|
-
struct ggml_tensor * lmy; // the L-BFGS memory y
|
|
2279
|
-
float fx_best;
|
|
2280
|
-
float step;
|
|
2281
|
-
int j;
|
|
2282
|
-
int k;
|
|
2283
|
-
int end;
|
|
2284
|
-
int n_no_improvement;
|
|
2285
|
-
} lbfgs;
|
|
2286
|
-
};
|
|
2287
|
-
|
|
2288
|
-
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
|
2289
|
-
|
|
2290
|
-
// optimize the function defined by the tensor f
|
|
2291
|
-
GGML_API enum ggml_opt_result ggml_opt(
|
|
2292
|
-
struct ggml_context * ctx,
|
|
2293
|
-
struct ggml_opt_params params,
|
|
2294
|
-
struct ggml_tensor * f);
|
|
2295
|
-
|
|
2296
|
-
// initialize optimizer context
|
|
2297
|
-
GGML_API void ggml_opt_init(
|
|
2298
|
-
struct ggml_context * ctx,
|
|
2299
|
-
struct ggml_opt_context * opt,
|
|
2300
|
-
struct ggml_opt_params params,
|
|
2301
|
-
int64_t nx);
|
|
2302
|
-
|
|
2303
|
-
// continue optimizing the function defined by the tensor f
|
|
2304
|
-
GGML_API enum ggml_opt_result ggml_opt_resume(
|
|
2305
|
-
struct ggml_context * ctx,
|
|
2306
|
-
struct ggml_opt_context * opt,
|
|
2307
|
-
struct ggml_tensor * f);
|
|
2308
|
-
|
|
2309
|
-
// continue optimizing the function defined by the tensor f
|
|
2310
|
-
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
|
2311
|
-
struct ggml_context * ctx,
|
|
2312
|
-
struct ggml_opt_context * opt,
|
|
2313
|
-
struct ggml_tensor * f,
|
|
2314
|
-
struct ggml_cgraph * gf,
|
|
2315
|
-
struct ggml_cgraph * gb,
|
|
2316
|
-
ggml_opt_callback callback,
|
|
2317
|
-
void * callback_data);
|
|
2318
|
-
|
|
2319
|
-
//
|
|
2320
|
-
// tensor flags
|
|
2321
|
-
//
|
|
2322
|
-
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
2323
|
-
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
2083
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
2324
2084
|
|
|
2325
2085
|
//
|
|
2326
2086
|
// quantization
|
|
@@ -2477,83 +2237,65 @@ extern "C" {
|
|
|
2477
2237
|
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
|
2478
2238
|
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
|
2479
2239
|
|
|
2480
|
-
|
|
2481
|
-
//
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
GGML_API int ggml_cpu_has_fma (void);
|
|
2492
|
-
GGML_API int ggml_cpu_has_neon (void);
|
|
2493
|
-
GGML_API int ggml_cpu_has_sve (void);
|
|
2494
|
-
GGML_API int ggml_cpu_has_arm_fma (void);
|
|
2495
|
-
GGML_API int ggml_cpu_has_metal (void);
|
|
2496
|
-
GGML_API int ggml_cpu_has_f16c (void);
|
|
2497
|
-
GGML_API int ggml_cpu_has_fp16_va (void);
|
|
2498
|
-
GGML_API int ggml_cpu_has_wasm_simd (void);
|
|
2499
|
-
GGML_API int ggml_cpu_has_blas (void);
|
|
2500
|
-
GGML_API int ggml_cpu_has_cuda (void);
|
|
2501
|
-
GGML_API int ggml_cpu_has_vulkan (void);
|
|
2502
|
-
GGML_API int ggml_cpu_has_kompute (void);
|
|
2503
|
-
GGML_API int ggml_cpu_has_gpublas (void);
|
|
2504
|
-
GGML_API int ggml_cpu_has_sse3 (void);
|
|
2505
|
-
GGML_API int ggml_cpu_has_ssse3 (void);
|
|
2506
|
-
GGML_API int ggml_cpu_has_riscv_v (void);
|
|
2507
|
-
GGML_API int ggml_cpu_has_sycl (void);
|
|
2508
|
-
GGML_API int ggml_cpu_has_rpc (void);
|
|
2509
|
-
GGML_API int ggml_cpu_has_vsx (void);
|
|
2510
|
-
GGML_API int ggml_cpu_has_matmul_int8(void);
|
|
2511
|
-
GGML_API int ggml_cpu_has_cann (void);
|
|
2512
|
-
GGML_API int ggml_cpu_has_llamafile (void);
|
|
2513
|
-
|
|
2514
|
-
// get the sve vector length in bytes
|
|
2515
|
-
GGML_API int ggml_cpu_get_sve_cnt(void);
|
|
2516
|
-
|
|
2517
|
-
//
|
|
2518
|
-
// Internal types and functions exposed for tests and benchmarks
|
|
2519
|
-
//
|
|
2520
|
-
|
|
2521
|
-
#ifdef __cplusplus
|
|
2522
|
-
// restrict not standard in C++
|
|
2523
|
-
#define GGML_RESTRICT
|
|
2240
|
+
#ifdef __cplusplus
|
|
2241
|
+
// restrict not standard in C++
|
|
2242
|
+
# if defined(__GNUC__)
|
|
2243
|
+
# define GGML_RESTRICT __restrict__
|
|
2244
|
+
# elif defined(__clang__)
|
|
2245
|
+
# define GGML_RESTRICT __restrict
|
|
2246
|
+
# elif defined(_MSC_VER)
|
|
2247
|
+
# define GGML_RESTRICT __restrict
|
|
2248
|
+
# else
|
|
2249
|
+
# define GGML_RESTRICT
|
|
2250
|
+
# endif
|
|
2524
2251
|
#else
|
|
2525
|
-
#define GGML_RESTRICT restrict
|
|
2252
|
+
# define GGML_RESTRICT restrict
|
|
2526
2253
|
#endif
|
|
2527
2254
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2528
2255
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
|
2532
|
-
const void * GGML_RESTRICT y, size_t by, int nrc);
|
|
2533
|
-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2534
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2535
|
-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2536
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2537
|
-
|
|
2538
|
-
typedef struct {
|
|
2256
|
+
|
|
2257
|
+
struct ggml_type_traits {
|
|
2539
2258
|
const char * type_name;
|
|
2540
2259
|
int64_t blck_size;
|
|
2541
2260
|
int64_t blck_size_interleave; // interleave elements in blocks
|
|
2542
2261
|
size_t type_size;
|
|
2543
2262
|
bool is_quantized;
|
|
2544
2263
|
ggml_to_float_t to_float;
|
|
2545
|
-
ggml_from_float_t from_float;
|
|
2546
2264
|
ggml_from_float_t from_float_ref;
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2265
|
+
};
|
|
2266
|
+
|
|
2267
|
+
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
|
2268
|
+
|
|
2269
|
+
// ggml threadpool
|
|
2270
|
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
|
2271
|
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
|
2272
|
+
|
|
2273
|
+
// scheduling priorities
|
|
2274
|
+
enum ggml_sched_priority {
|
|
2275
|
+
GGML_SCHED_PRIO_NORMAL,
|
|
2276
|
+
GGML_SCHED_PRIO_MEDIUM,
|
|
2277
|
+
GGML_SCHED_PRIO_HIGH,
|
|
2278
|
+
GGML_SCHED_PRIO_REALTIME
|
|
2279
|
+
};
|
|
2280
|
+
|
|
2281
|
+
// threadpool params
|
|
2282
|
+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
|
2283
|
+
struct ggml_threadpool_params {
|
|
2284
|
+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
2285
|
+
int n_threads; // number of threads
|
|
2286
|
+
enum ggml_sched_priority prio; // thread priority
|
|
2287
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
2288
|
+
bool strict_cpu; // strict cpu placement
|
|
2289
|
+
bool paused; // start in paused state
|
|
2290
|
+
};
|
|
2291
|
+
|
|
2292
|
+
struct ggml_threadpool; // forward declaration, see ggml.c
|
|
2293
|
+
|
|
2294
|
+
typedef struct ggml_threadpool * ggml_threadpool_t;
|
|
2295
|
+
|
|
2296
|
+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
|
2297
|
+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
|
2298
|
+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
|
2557
2299
|
|
|
2558
2300
|
#ifdef __cplusplus
|
|
2559
2301
|
}
|