@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -8,7 +8,10 @@
|
|
|
8
8
|
|
|
9
9
|
// FIXME: required here for quantization functions
|
|
10
10
|
#include "ggml-quants.h"
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
#ifdef GGML_USE_CPU_HBM
|
|
13
|
+
#include <hbwmalloc.h>
|
|
14
|
+
#endif
|
|
12
15
|
|
|
13
16
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
14
17
|
#include <malloc.h> // using malloc.h with MSC/MINGW
|
|
@@ -788,32 +791,23 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
|
788
791
|
.to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
|
|
789
792
|
.from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
|
|
790
793
|
},
|
|
791
|
-
[
|
|
792
|
-
.type_name = "
|
|
793
|
-
.blck_size =
|
|
794
|
-
.
|
|
795
|
-
.
|
|
796
|
-
.is_quantized = true,
|
|
797
|
-
.to_float = NULL,
|
|
798
|
-
.from_float_ref = NULL,
|
|
794
|
+
[31] = { // GGML_TYPE_Q4_0_4_4
|
|
795
|
+
.type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
|
|
796
|
+
.blck_size = 0,
|
|
797
|
+
.type_size = 0,
|
|
798
|
+
.is_quantized = false,
|
|
799
799
|
},
|
|
800
|
-
[
|
|
801
|
-
.type_name = "
|
|
802
|
-
.blck_size =
|
|
803
|
-
.
|
|
804
|
-
.
|
|
805
|
-
.is_quantized = true,
|
|
806
|
-
.to_float = NULL,
|
|
807
|
-
.from_float_ref = NULL,
|
|
800
|
+
[32] = { // GGML_TYPE_Q4_0_4_8
|
|
801
|
+
.type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
|
|
802
|
+
.blck_size = 0,
|
|
803
|
+
.type_size = 0,
|
|
804
|
+
.is_quantized = false,
|
|
808
805
|
},
|
|
809
|
-
[
|
|
810
|
-
.type_name = "
|
|
811
|
-
.blck_size =
|
|
812
|
-
.
|
|
813
|
-
.
|
|
814
|
-
.is_quantized = true,
|
|
815
|
-
.to_float = NULL,
|
|
816
|
-
.from_float_ref = NULL,
|
|
806
|
+
[33] = { // GGML_TYPE_Q4_0_8_8
|
|
807
|
+
.type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
|
|
808
|
+
.blck_size = 0,
|
|
809
|
+
.type_size = 0,
|
|
810
|
+
.is_quantized = false,
|
|
817
811
|
},
|
|
818
812
|
[GGML_TYPE_TQ1_0] = {
|
|
819
813
|
.type_name = "tq1_0",
|
|
@@ -831,6 +825,24 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
|
831
825
|
.to_float = (ggml_to_float_t) dequantize_row_tq2_0,
|
|
832
826
|
.from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref,
|
|
833
827
|
},
|
|
828
|
+
[36] = { // GGML_TYPE_IQ4_NL_4_4
|
|
829
|
+
.type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
|
|
830
|
+
.blck_size = 0,
|
|
831
|
+
.type_size = 0,
|
|
832
|
+
.is_quantized = false,
|
|
833
|
+
},
|
|
834
|
+
[37] = { // GGML_TYPE_IQ4_NL_4_8
|
|
835
|
+
.type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
|
|
836
|
+
.blck_size = 0,
|
|
837
|
+
.type_size = 0,
|
|
838
|
+
.is_quantized = false,
|
|
839
|
+
},
|
|
840
|
+
[38] = { // GGML_TYPE_IQ4_NL_8_8
|
|
841
|
+
.type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
|
|
842
|
+
.blck_size = 0,
|
|
843
|
+
.type_size = 0,
|
|
844
|
+
.is_quantized = false,
|
|
845
|
+
},
|
|
834
846
|
};
|
|
835
847
|
|
|
836
848
|
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
|
@@ -941,6 +953,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
941
953
|
"POOL_2D_BACK",
|
|
942
954
|
"UPSCALE",
|
|
943
955
|
"PAD",
|
|
956
|
+
"PAD_REFLECT_1D",
|
|
944
957
|
"ARANGE",
|
|
945
958
|
"TIMESTEP_EMBEDDING",
|
|
946
959
|
"ARGSORT",
|
|
@@ -974,7 +987,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
974
987
|
"OPT_STEP_ADAMW",
|
|
975
988
|
};
|
|
976
989
|
|
|
977
|
-
static_assert(GGML_OP_COUNT ==
|
|
990
|
+
static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
|
|
978
991
|
|
|
979
992
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
980
993
|
"none",
|
|
@@ -1036,6 +1049,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1036
1049
|
"pool_2d_back(x)",
|
|
1037
1050
|
"upscale(x)",
|
|
1038
1051
|
"pad(x)",
|
|
1052
|
+
"pad_reflect_1d(x)",
|
|
1039
1053
|
"arange(start, stop, step)",
|
|
1040
1054
|
"timestep_embedding(timesteps, dim, max_period)",
|
|
1041
1055
|
"argsort(x)",
|
|
@@ -1069,7 +1083,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1069
1083
|
"adamw(x)",
|
|
1070
1084
|
};
|
|
1071
1085
|
|
|
1072
|
-
static_assert(GGML_OP_COUNT ==
|
|
1086
|
+
static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
|
|
1073
1087
|
|
|
1074
1088
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1075
1089
|
|
|
@@ -1259,9 +1273,6 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
|
1259
1273
|
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
|
|
1260
1274
|
case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
|
|
1261
1275
|
case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
|
|
1262
|
-
case GGML_FTYPE_MOSTLY_Q4_0_4_4: wtype = GGML_TYPE_Q4_0_4_4; break;
|
|
1263
|
-
case GGML_FTYPE_MOSTLY_Q4_0_4_8: wtype = GGML_TYPE_Q4_0_4_8; break;
|
|
1264
|
-
case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
|
|
1265
1276
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
|
1266
1277
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
|
1267
1278
|
}
|
|
@@ -2255,6 +2266,7 @@ struct ggml_tensor * ggml_argmax(
|
|
|
2255
2266
|
struct ggml_context * ctx,
|
|
2256
2267
|
struct ggml_tensor * a) {
|
|
2257
2268
|
GGML_ASSERT(ggml_is_matrix(a));
|
|
2269
|
+
GGML_ASSERT(a->ne[0] <= INT32_MAX);
|
|
2258
2270
|
|
|
2259
2271
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
|
|
2260
2272
|
|
|
@@ -3505,15 +3517,18 @@ static struct ggml_tensor * ggml_rope_impl(
|
|
|
3505
3517
|
GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
|
3506
3518
|
}
|
|
3507
3519
|
|
|
3520
|
+
int sections[4] = {0, 0, 0, 0};
|
|
3521
|
+
|
|
3508
3522
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
3509
3523
|
|
|
3510
|
-
int32_t params[
|
|
3524
|
+
int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
|
3511
3525
|
memcpy(params + 5, &freq_base, sizeof(float));
|
|
3512
3526
|
memcpy(params + 6, &freq_scale, sizeof(float));
|
|
3513
3527
|
memcpy(params + 7, &ext_factor, sizeof(float));
|
|
3514
3528
|
memcpy(params + 8, &attn_factor, sizeof(float));
|
|
3515
3529
|
memcpy(params + 9, &beta_fast, sizeof(float));
|
|
3516
3530
|
memcpy(params + 10, &beta_slow, sizeof(float));
|
|
3531
|
+
memcpy(params + 11, §ions, sizeof(int)*4);
|
|
3517
3532
|
ggml_set_op_params(result, params, sizeof(params));
|
|
3518
3533
|
|
|
3519
3534
|
result->op = GGML_OP_ROPE;
|
|
@@ -3535,6 +3550,53 @@ struct ggml_tensor * ggml_rope(
|
|
|
3535
3550
|
);
|
|
3536
3551
|
}
|
|
3537
3552
|
|
|
3553
|
+
struct ggml_tensor * ggml_rope_multi(
|
|
3554
|
+
struct ggml_context * ctx,
|
|
3555
|
+
struct ggml_tensor * a,
|
|
3556
|
+
struct ggml_tensor * b,
|
|
3557
|
+
struct ggml_tensor * c,
|
|
3558
|
+
int n_dims,
|
|
3559
|
+
int sections[4],
|
|
3560
|
+
int mode,
|
|
3561
|
+
int n_ctx_orig,
|
|
3562
|
+
float freq_base,
|
|
3563
|
+
float freq_scale,
|
|
3564
|
+
float ext_factor,
|
|
3565
|
+
float attn_factor,
|
|
3566
|
+
float beta_fast,
|
|
3567
|
+
float beta_slow) {
|
|
3568
|
+
// Multimodal Rotary Position Embedding
|
|
3569
|
+
GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
|
|
3570
|
+
|
|
3571
|
+
GGML_ASSERT(ggml_is_vector(b));
|
|
3572
|
+
GGML_ASSERT(b->type == GGML_TYPE_I32);
|
|
3573
|
+
GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
|
|
3574
|
+
|
|
3575
|
+
if (c) {
|
|
3576
|
+
GGML_ASSERT(c->type == GGML_TYPE_F32);
|
|
3577
|
+
GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
|
3578
|
+
}
|
|
3579
|
+
|
|
3580
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
3581
|
+
|
|
3582
|
+
int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
|
3583
|
+
memcpy(params + 5, &freq_base, sizeof(float));
|
|
3584
|
+
memcpy(params + 6, &freq_scale, sizeof(float));
|
|
3585
|
+
memcpy(params + 7, &ext_factor, sizeof(float));
|
|
3586
|
+
memcpy(params + 8, &attn_factor, sizeof(float));
|
|
3587
|
+
memcpy(params + 9, &beta_fast, sizeof(float));
|
|
3588
|
+
memcpy(params + 10, &beta_slow, sizeof(float));
|
|
3589
|
+
memcpy(¶ms[11], sections, sizeof(int)*4);
|
|
3590
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
3591
|
+
|
|
3592
|
+
result->op = GGML_OP_ROPE;
|
|
3593
|
+
result->src[0] = a;
|
|
3594
|
+
result->src[1] = b;
|
|
3595
|
+
result->src[2] = c;
|
|
3596
|
+
|
|
3597
|
+
return result;
|
|
3598
|
+
}
|
|
3599
|
+
|
|
3538
3600
|
struct ggml_tensor * ggml_rope_inplace(
|
|
3539
3601
|
struct ggml_context * ctx,
|
|
3540
3602
|
struct ggml_tensor * a,
|
|
@@ -3698,13 +3760,84 @@ struct ggml_tensor * ggml_clamp(
|
|
|
3698
3760
|
return result;
|
|
3699
3761
|
}
|
|
3700
3762
|
|
|
3701
|
-
// ggml_conv_1d
|
|
3702
|
-
|
|
3703
3763
|
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
|
3704
3764
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
|
3705
3765
|
}
|
|
3706
3766
|
|
|
3707
|
-
|
|
3767
|
+
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
|
3768
|
+
// a: [OC,IC, KH, KW]
|
|
3769
|
+
// b: [N, IC, IH, IW]
|
|
3770
|
+
// result: [N, OH, OW, IC*KH*KW]
|
|
3771
|
+
struct ggml_tensor * ggml_im2col(
|
|
3772
|
+
struct ggml_context * ctx,
|
|
3773
|
+
struct ggml_tensor * a,
|
|
3774
|
+
struct ggml_tensor * b,
|
|
3775
|
+
int s0,
|
|
3776
|
+
int s1,
|
|
3777
|
+
int p0,
|
|
3778
|
+
int p1,
|
|
3779
|
+
int d0,
|
|
3780
|
+
int d1,
|
|
3781
|
+
bool is_2D,
|
|
3782
|
+
enum ggml_type dst_type) {
|
|
3783
|
+
if (is_2D) {
|
|
3784
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
3785
|
+
} else {
|
|
3786
|
+
//GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
|
|
3787
|
+
GGML_ASSERT(b->ne[1] == a->ne[1]);
|
|
3788
|
+
GGML_ASSERT(b->ne[3] == 1);
|
|
3789
|
+
}
|
|
3790
|
+
|
|
3791
|
+
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
|
3792
|
+
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
|
3793
|
+
|
|
3794
|
+
GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
|
|
3795
|
+
GGML_ASSERT((OW > 0) && "b too small compared to a");
|
|
3796
|
+
|
|
3797
|
+
const int64_t ne[4] = {
|
|
3798
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
|
3799
|
+
OW,
|
|
3800
|
+
is_2D ? OH : b->ne[2],
|
|
3801
|
+
is_2D ? b->ne[3] : 1,
|
|
3802
|
+
};
|
|
3803
|
+
|
|
3804
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
|
|
3805
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
|
3806
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
3807
|
+
|
|
3808
|
+
result->op = GGML_OP_IM2COL;
|
|
3809
|
+
result->src[0] = a;
|
|
3810
|
+
result->src[1] = b;
|
|
3811
|
+
|
|
3812
|
+
return result;
|
|
3813
|
+
}
|
|
3814
|
+
|
|
3815
|
+
struct ggml_tensor * ggml_im2col_back(
|
|
3816
|
+
struct ggml_context * ctx,
|
|
3817
|
+
struct ggml_tensor * a,
|
|
3818
|
+
struct ggml_tensor * b,
|
|
3819
|
+
int64_t * ne,
|
|
3820
|
+
int s0,
|
|
3821
|
+
int s1,
|
|
3822
|
+
int p0,
|
|
3823
|
+
int p1,
|
|
3824
|
+
int d0,
|
|
3825
|
+
int d1,
|
|
3826
|
+
bool is_2D) {
|
|
3827
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
3828
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
|
3829
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
3830
|
+
|
|
3831
|
+
result->op = GGML_OP_IM2COL_BACK;
|
|
3832
|
+
result->src[0] = a;
|
|
3833
|
+
result->src[1] = b;
|
|
3834
|
+
|
|
3835
|
+
return result;
|
|
3836
|
+
}
|
|
3837
|
+
|
|
3838
|
+
// ggml_conv_1d
|
|
3839
|
+
|
|
3840
|
+
struct ggml_tensor * ggml_conv_1d(
|
|
3708
3841
|
struct ggml_context * ctx,
|
|
3709
3842
|
struct ggml_tensor * a,
|
|
3710
3843
|
struct ggml_tensor * b,
|
|
@@ -3734,137 +3867,75 @@ struct ggml_tensor* ggml_conv_1d_ph(
|
|
|
3734
3867
|
return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
|
|
3735
3868
|
}
|
|
3736
3869
|
|
|
3737
|
-
//
|
|
3738
|
-
|
|
3739
|
-
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
|
3740
|
-
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
|
3741
|
-
}
|
|
3870
|
+
// ggml_conv_1d_dw
|
|
3742
3871
|
|
|
3743
|
-
|
|
3872
|
+
struct ggml_tensor * ggml_conv_1d_dw(
|
|
3744
3873
|
struct ggml_context * ctx,
|
|
3745
3874
|
struct ggml_tensor * a,
|
|
3746
3875
|
struct ggml_tensor * b,
|
|
3747
3876
|
int s0,
|
|
3748
3877
|
int p0,
|
|
3749
3878
|
int d0) {
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
GGML_ASSERT(a->ne[3] == 1);
|
|
3879
|
+
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
|
|
3880
|
+
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
|
|
3753
3881
|
|
|
3754
|
-
|
|
3755
|
-
GGML_ASSERT(d0 == 1);
|
|
3882
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
|
|
3756
3883
|
|
|
3757
|
-
|
|
3758
|
-
ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
|
3759
|
-
a->ne[1], b->ne[2], 1,
|
|
3760
|
-
};
|
|
3761
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
3884
|
+
struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
|
|
3762
3885
|
|
|
3763
|
-
|
|
3764
|
-
ggml_set_op_params(result, params, sizeof(params));
|
|
3765
|
-
|
|
3766
|
-
result->op = GGML_OP_CONV_TRANSPOSE_1D;
|
|
3767
|
-
result->src[0] = a;
|
|
3768
|
-
result->src[1] = b;
|
|
3886
|
+
result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
|
|
3769
3887
|
|
|
3770
3888
|
return result;
|
|
3771
3889
|
}
|
|
3772
3890
|
|
|
3773
|
-
//
|
|
3891
|
+
// ggml_conv_1d_dw_ph
|
|
3774
3892
|
|
|
3775
|
-
struct ggml_tensor *
|
|
3893
|
+
struct ggml_tensor * ggml_conv_1d_dw_ph(
|
|
3776
3894
|
struct ggml_context * ctx,
|
|
3777
3895
|
struct ggml_tensor * a,
|
|
3778
3896
|
struct ggml_tensor * b,
|
|
3779
3897
|
int s0,
|
|
3780
|
-
int
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
int d0,
|
|
3784
|
-
int d1) {
|
|
3785
|
-
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
|
3786
|
-
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
|
3787
|
-
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
|
3788
|
-
s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
|
3789
|
-
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
|
3898
|
+
int d0) {
|
|
3899
|
+
return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
|
|
3900
|
+
}
|
|
3790
3901
|
|
|
3791
|
-
|
|
3792
|
-
struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
|
|
3793
|
-
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
|
3902
|
+
// ggml_conv_transpose_1d
|
|
3794
3903
|
|
|
3795
|
-
|
|
3904
|
+
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
|
3905
|
+
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
|
3796
3906
|
}
|
|
3797
|
-
// ggml_conv_2d
|
|
3798
3907
|
|
|
3799
|
-
|
|
3800
|
-
// a: [OC,IC, KH, KW]
|
|
3801
|
-
// b: [N, IC, IH, IW]
|
|
3802
|
-
// result: [N, OH, OW, IC*KH*KW]
|
|
3803
|
-
struct ggml_tensor * ggml_im2col(
|
|
3908
|
+
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
3804
3909
|
struct ggml_context * ctx,
|
|
3805
3910
|
struct ggml_tensor * a,
|
|
3806
3911
|
struct ggml_tensor * b,
|
|
3807
3912
|
int s0,
|
|
3808
|
-
int s1,
|
|
3809
3913
|
int p0,
|
|
3810
|
-
int
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
enum ggml_type dst_type) {
|
|
3815
|
-
if(is_2D) {
|
|
3816
|
-
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
3817
|
-
} else {
|
|
3818
|
-
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
|
3819
|
-
GGML_ASSERT(b->ne[3] == 1);
|
|
3820
|
-
}
|
|
3821
|
-
|
|
3822
|
-
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
|
3823
|
-
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
|
3914
|
+
int d0) {
|
|
3915
|
+
GGML_ASSERT(ggml_is_matrix(b));
|
|
3916
|
+
GGML_ASSERT(a->ne[2] == b->ne[1]);
|
|
3917
|
+
GGML_ASSERT(a->ne[3] == 1);
|
|
3824
3918
|
|
|
3825
|
-
GGML_ASSERT(
|
|
3826
|
-
GGML_ASSERT(
|
|
3919
|
+
GGML_ASSERT(p0 == 0);
|
|
3920
|
+
GGML_ASSERT(d0 == 1);
|
|
3827
3921
|
|
|
3828
3922
|
const int64_t ne[4] = {
|
|
3829
|
-
|
|
3830
|
-
|
|
3831
|
-
is_2D ? OH : b->ne[2],
|
|
3832
|
-
is_2D ? b->ne[3] : 1,
|
|
3923
|
+
ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
|
3924
|
+
a->ne[1], b->ne[2], 1,
|
|
3833
3925
|
};
|
|
3926
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
3834
3927
|
|
|
3835
|
-
|
|
3836
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
|
3928
|
+
int32_t params[] = { s0, p0, d0 };
|
|
3837
3929
|
ggml_set_op_params(result, params, sizeof(params));
|
|
3838
3930
|
|
|
3839
|
-
result->op =
|
|
3931
|
+
result->op = GGML_OP_CONV_TRANSPOSE_1D;
|
|
3840
3932
|
result->src[0] = a;
|
|
3841
3933
|
result->src[1] = b;
|
|
3842
3934
|
|
|
3843
3935
|
return result;
|
|
3844
3936
|
}
|
|
3845
3937
|
|
|
3846
|
-
|
|
3847
|
-
struct ggml_context * ctx,
|
|
3848
|
-
struct ggml_tensor * a,
|
|
3849
|
-
struct ggml_tensor * b,
|
|
3850
|
-
int64_t * ne,
|
|
3851
|
-
int s0,
|
|
3852
|
-
int s1,
|
|
3853
|
-
int p0,
|
|
3854
|
-
int p1,
|
|
3855
|
-
int d0,
|
|
3856
|
-
int d1,
|
|
3857
|
-
bool is_2D) {
|
|
3858
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
3859
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
|
3860
|
-
ggml_set_op_params(result, params, sizeof(params));
|
|
3861
|
-
|
|
3862
|
-
result->op = GGML_OP_IM2COL_BACK;
|
|
3863
|
-
result->src[0] = a;
|
|
3864
|
-
result->src[1] = b;
|
|
3865
|
-
|
|
3866
|
-
return result;
|
|
3867
|
-
}
|
|
3938
|
+
// ggml_conv_2d
|
|
3868
3939
|
|
|
3869
3940
|
// a: [OC,IC, KH, KW]
|
|
3870
3941
|
// b: [N, IC, IH, IW]
|
|
@@ -3911,6 +3982,31 @@ struct ggml_tensor * ggml_conv_2d_s1_ph(
|
|
|
3911
3982
|
return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
|
|
3912
3983
|
}
|
|
3913
3984
|
|
|
3985
|
+
// ggml_conv_2d_dw
|
|
3986
|
+
|
|
3987
|
+
struct ggml_tensor * ggml_conv_2d_dw(
|
|
3988
|
+
struct ggml_context * ctx,
|
|
3989
|
+
struct ggml_tensor * a,
|
|
3990
|
+
struct ggml_tensor * b,
|
|
3991
|
+
int s0,
|
|
3992
|
+
int s1,
|
|
3993
|
+
int p0,
|
|
3994
|
+
int p1,
|
|
3995
|
+
int d0,
|
|
3996
|
+
int d1) {
|
|
3997
|
+
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
|
3998
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
|
3999
|
+
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
|
4000
|
+
s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
|
4001
|
+
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
|
4002
|
+
|
|
4003
|
+
new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
|
4004
|
+
struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
|
|
4005
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
|
4006
|
+
|
|
4007
|
+
return result;
|
|
4008
|
+
}
|
|
4009
|
+
|
|
3914
4010
|
// ggml_conv_transpose_2d_p0
|
|
3915
4011
|
|
|
3916
4012
|
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
|
@@ -4087,6 +4183,37 @@ struct ggml_tensor * ggml_pad(
|
|
|
4087
4183
|
return result;
|
|
4088
4184
|
}
|
|
4089
4185
|
|
|
4186
|
+
// ggml_pad_reflect_1d
|
|
4187
|
+
|
|
4188
|
+
struct ggml_tensor * ggml_pad_reflect_1d(
|
|
4189
|
+
struct ggml_context * ctx,
|
|
4190
|
+
struct ggml_tensor * a,
|
|
4191
|
+
int p0,
|
|
4192
|
+
int p1) {
|
|
4193
|
+
GGML_ASSERT(p0 >= 0);
|
|
4194
|
+
GGML_ASSERT(p1 >= 0);
|
|
4195
|
+
|
|
4196
|
+
GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
|
|
4197
|
+
GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
|
|
4198
|
+
|
|
4199
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
4200
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
4201
|
+
|
|
4202
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
|
4203
|
+
a->ne[0] + p0 + p1,
|
|
4204
|
+
a->ne[1],
|
|
4205
|
+
a->ne[2],
|
|
4206
|
+
a->ne[3]);
|
|
4207
|
+
|
|
4208
|
+
int32_t params[] = { p0, p1 };
|
|
4209
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
4210
|
+
|
|
4211
|
+
result->op = GGML_OP_PAD_REFLECT_1D;
|
|
4212
|
+
result->src[0] = a;
|
|
4213
|
+
|
|
4214
|
+
return result;
|
|
4215
|
+
}
|
|
4216
|
+
|
|
4090
4217
|
// ggml_arange
|
|
4091
4218
|
|
|
4092
4219
|
struct ggml_tensor * ggml_arange(
|
|
@@ -4138,6 +4265,7 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4138
4265
|
struct ggml_context * ctx,
|
|
4139
4266
|
struct ggml_tensor * a,
|
|
4140
4267
|
enum ggml_sort_order order) {
|
|
4268
|
+
GGML_ASSERT(a->ne[0] <= INT32_MAX);
|
|
4141
4269
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
|
4142
4270
|
|
|
4143
4271
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
|
@@ -5019,8 +5147,10 @@ static void ggml_hash_map_free(struct hash_map * map) {
|
|
|
5019
5147
|
}
|
|
5020
5148
|
|
|
5021
5149
|
// utility functions to change gradients
|
|
5022
|
-
//
|
|
5023
|
-
//
|
|
5150
|
+
// isrc is the index of tensor in cgraph->visited_has_set.keys
|
|
5151
|
+
// the corresponding gradient (accumulators) are also at position isrc
|
|
5152
|
+
// if tensor has a gradient accumulator, modify that accumulator in-place
|
|
5153
|
+
// else if there is no gradient for tensor, set the corresponding value
|
|
5024
5154
|
// else, just add/subtract/etc. the gradients
|
|
5025
5155
|
|
|
5026
5156
|
static void ggml_add_or_set(
|
|
@@ -5028,11 +5158,14 @@ static void ggml_add_or_set(
|
|
|
5028
5158
|
struct ggml_cgraph * cgraph,
|
|
5029
5159
|
size_t isrc,
|
|
5030
5160
|
struct ggml_tensor * tensor) {
|
|
5161
|
+
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
|
5162
|
+
GGML_ASSERT(src);
|
|
5031
5163
|
if (cgraph->grads[isrc]) {
|
|
5032
|
-
cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
|
5164
|
+
cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
|
|
5033
5165
|
} else {
|
|
5034
5166
|
cgraph->grads[isrc] = tensor;
|
|
5035
5167
|
}
|
|
5168
|
+
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
|
5036
5169
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
|
5037
5170
|
}
|
|
5038
5171
|
|
|
@@ -5040,18 +5173,20 @@ static void ggml_acc_or_set(
|
|
|
5040
5173
|
struct ggml_context * ctx,
|
|
5041
5174
|
struct ggml_cgraph * cgraph,
|
|
5042
5175
|
size_t isrc,
|
|
5043
|
-
struct ggml_tensor * src,
|
|
5044
5176
|
struct ggml_tensor * tensor,
|
|
5045
5177
|
const size_t nb1,
|
|
5046
5178
|
const size_t nb2,
|
|
5047
5179
|
const size_t nb3,
|
|
5048
5180
|
const size_t offset) {
|
|
5181
|
+
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
|
5182
|
+
GGML_ASSERT(src);
|
|
5049
5183
|
if (cgraph->grads[isrc]) {
|
|
5050
5184
|
cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
|
|
5051
5185
|
} else {
|
|
5052
5186
|
struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
|
|
5053
5187
|
cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
|
|
5054
5188
|
}
|
|
5189
|
+
ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
|
|
5055
5190
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
|
5056
5191
|
}
|
|
5057
5192
|
|
|
@@ -5059,13 +5194,15 @@ static void ggml_add1_or_set(
|
|
|
5059
5194
|
struct ggml_context * ctx,
|
|
5060
5195
|
struct ggml_cgraph * cgraph,
|
|
5061
5196
|
size_t isrc,
|
|
5062
|
-
struct ggml_tensor * src,
|
|
5063
5197
|
struct ggml_tensor * tensor) {
|
|
5198
|
+
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
|
5199
|
+
GGML_ASSERT(src);
|
|
5064
5200
|
if (cgraph->grads[isrc]) {
|
|
5065
5201
|
cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
|
5066
5202
|
} else {
|
|
5067
5203
|
cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
|
|
5068
5204
|
}
|
|
5205
|
+
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
|
5069
5206
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
|
5070
5207
|
}
|
|
5071
5208
|
|
|
@@ -5074,11 +5211,14 @@ static void ggml_sub_or_set(
|
|
|
5074
5211
|
struct ggml_cgraph * cgraph,
|
|
5075
5212
|
size_t isrc,
|
|
5076
5213
|
struct ggml_tensor * tensor) {
|
|
5214
|
+
struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
|
|
5215
|
+
GGML_ASSERT(src);
|
|
5077
5216
|
if (cgraph->grads[isrc]) {
|
|
5078
5217
|
cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
|
|
5079
5218
|
} else {
|
|
5080
5219
|
cgraph->grads[isrc] = ggml_neg(ctx, tensor);
|
|
5081
5220
|
}
|
|
5221
|
+
ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
|
|
5082
5222
|
ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
|
|
5083
5223
|
}
|
|
5084
5224
|
|
|
@@ -5095,12 +5235,12 @@ static void ggml_compute_backward(
|
|
|
5095
5235
|
struct ggml_tensor * src1 = tensor->src[1];
|
|
5096
5236
|
struct ggml_tensor * src2 = tensor->src[2];
|
|
5097
5237
|
struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
|
|
5098
|
-
const size_t isrc0 = ggml_hash_find(hash_set, src0);
|
|
5099
|
-
const size_t isrc1 = ggml_hash_find(hash_set, src1);
|
|
5100
|
-
const size_t isrc2 = ggml_hash_find(hash_set, src2);
|
|
5101
|
-
const bool src0_needs_grads = isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
|
|
5102
|
-
const bool src1_needs_grads = isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
|
|
5103
|
-
const bool src2_needs_grads = isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
|
|
5238
|
+
const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
|
|
5239
|
+
const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
|
|
5240
|
+
const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
|
|
5241
|
+
const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
|
|
5242
|
+
const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
|
|
5243
|
+
const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
|
|
5104
5244
|
|
|
5105
5245
|
switch (tensor->op) {
|
|
5106
5246
|
case GGML_OP_DUP: {
|
|
@@ -5200,7 +5340,7 @@ static void ggml_compute_backward(
|
|
|
5200
5340
|
} break;
|
|
5201
5341
|
case GGML_OP_SUM: {
|
|
5202
5342
|
if (src0_needs_grads) {
|
|
5203
|
-
ggml_add1_or_set(ctx, cgraph, isrc0,
|
|
5343
|
+
ggml_add1_or_set(ctx, cgraph, isrc0, grad);
|
|
5204
5344
|
}
|
|
5205
5345
|
} break;
|
|
5206
5346
|
case GGML_OP_SUM_ROWS: {
|
|
@@ -5210,7 +5350,7 @@ static void ggml_compute_backward(
|
|
|
5210
5350
|
} break;
|
|
5211
5351
|
case GGML_OP_MEAN: {
|
|
5212
5352
|
if (src0_needs_grads) {
|
|
5213
|
-
ggml_add1_or_set(ctx, cgraph, isrc0,
|
|
5353
|
+
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
|
|
5214
5354
|
}
|
|
5215
5355
|
} break;
|
|
5216
5356
|
case GGML_OP_REPEAT: {
|
|
@@ -5363,7 +5503,7 @@ static void ggml_compute_backward(
|
|
|
5363
5503
|
nb3 = (nb3 / n0) * ng;
|
|
5364
5504
|
}
|
|
5365
5505
|
|
|
5366
|
-
ggml_acc_or_set(ctx, cgraph, isrc0,
|
|
5506
|
+
ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
|
|
5367
5507
|
}
|
|
5368
5508
|
} break;
|
|
5369
5509
|
case GGML_OP_PERMUTE: {
|
|
@@ -5597,10 +5737,9 @@ void ggml_build_backward_expand(
|
|
|
5597
5737
|
|
|
5598
5738
|
const int n_nodes_f = cgraph->n_nodes;
|
|
5599
5739
|
|
|
5600
|
-
|
|
5601
|
-
memset(cgraph->
|
|
5602
|
-
|
|
5603
|
-
bool * grads_needed = calloc(hash_size, sizeof(bool));
|
|
5740
|
+
memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
|
5741
|
+
memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
|
5742
|
+
bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
|
|
5604
5743
|
|
|
5605
5744
|
{
|
|
5606
5745
|
bool any_params = false;
|
|
@@ -5621,7 +5760,7 @@ void ggml_build_backward_expand(
|
|
|
5621
5760
|
continue;
|
|
5622
5761
|
}
|
|
5623
5762
|
|
|
5624
|
-
bool node_needs_grad = node->flags & GGML_TENSOR_FLAG_PARAM;
|
|
5763
|
+
bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
|
|
5625
5764
|
bool ignore_src[GGML_MAX_SRC] = {false};
|
|
5626
5765
|
switch (node->op) {
|
|
5627
5766
|
// gradients in node->src[0] for one reason or another have no effect on output gradients
|
|
@@ -5638,7 +5777,7 @@ void ggml_build_backward_expand(
|
|
|
5638
5777
|
} break;
|
|
5639
5778
|
|
|
5640
5779
|
// gradients in node->src[1] for one reason or another have no effect on output gradients
|
|
5641
|
-
case GGML_OP_CPY: // gradients in CPY target
|
|
5780
|
+
case GGML_OP_CPY: // gradients in CPY target are irrelevant
|
|
5642
5781
|
case GGML_OP_GET_ROWS: // row indices not differentiable
|
|
5643
5782
|
case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
|
|
5644
5783
|
case GGML_OP_ROPE: // positions not differentiable
|
|
@@ -5665,9 +5804,12 @@ void ggml_build_backward_expand(
|
|
|
5665
5804
|
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
|
|
5666
5805
|
|
|
5667
5806
|
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
5807
|
+
GGML_ASSERT(igrad != GGML_HASHSET_FULL);
|
|
5808
|
+
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, igrad));
|
|
5668
5809
|
if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
|
|
5669
|
-
cgraph->
|
|
5670
|
-
cgraph->
|
|
5810
|
+
cgraph->grad_accs[igrad] = ggml_dup_tensor(ctx_static, node);
|
|
5811
|
+
cgraph->grads[igrad] = cgraph->grad_accs[igrad];
|
|
5812
|
+
ggml_format_name(cgraph->grad_accs[igrad], "grad acc for %s", node->name);
|
|
5671
5813
|
}
|
|
5672
5814
|
grads_needed[igrad] = true;
|
|
5673
5815
|
}
|
|
@@ -5761,15 +5903,15 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
|
|
5761
5903
|
|
|
5762
5904
|
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
|
|
5763
5905
|
struct ggml_cgraph cgraph = {
|
|
5764
|
-
/*.size
|
|
5765
|
-
/*.n_nodes
|
|
5766
|
-
/*.n_leafs
|
|
5767
|
-
/*.nodes
|
|
5768
|
-
/*.grads
|
|
5769
|
-
/*.grad_accs
|
|
5770
|
-
/*.leafs
|
|
5771
|
-
/*.
|
|
5772
|
-
/*.order
|
|
5906
|
+
/*.size =*/ 0,
|
|
5907
|
+
/*.n_nodes =*/ i1 - i0,
|
|
5908
|
+
/*.n_leafs =*/ 0,
|
|
5909
|
+
/*.nodes =*/ cgraph0->nodes + i0,
|
|
5910
|
+
/*.grads =*/ NULL, // gradients would need visited_hash_set
|
|
5911
|
+
/*.grad_accs =*/ NULL,
|
|
5912
|
+
/*.leafs =*/ NULL,
|
|
5913
|
+
/*.visited_hash_set =*/ { 0, NULL, NULL },
|
|
5914
|
+
/*.order =*/ cgraph0->order,
|
|
5773
5915
|
};
|
|
5774
5916
|
|
|
5775
5917
|
return cgraph;
|
|
@@ -5799,12 +5941,22 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
|
|
5799
5941
|
}
|
|
5800
5942
|
}
|
|
5801
5943
|
|
|
5944
|
+
if (dst->grads) {
|
|
5945
|
+
memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
|
5946
|
+
memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
|
|
5947
|
+
}
|
|
5802
5948
|
if (src->grads) {
|
|
5803
5949
|
GGML_ASSERT(dst->grads != NULL);
|
|
5804
5950
|
GGML_ASSERT(dst->grad_accs != NULL);
|
|
5805
5951
|
for (int i = 0; i < src->n_nodes; ++i) {
|
|
5806
5952
|
const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
|
|
5807
5953
|
const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
|
|
5954
|
+
|
|
5955
|
+
GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
|
|
5956
|
+
GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
|
|
5957
|
+
GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
|
|
5958
|
+
GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
|
|
5959
|
+
|
|
5808
5960
|
dst->grads[igrad_dst] = src->grads[igrad_src];
|
|
5809
5961
|
dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
|
|
5810
5962
|
}
|
|
@@ -5839,12 +5991,8 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
|
5839
5991
|
|
|
5840
5992
|
if (node->op == GGML_OP_OPT_STEP_ADAMW) {
|
|
5841
5993
|
// clear momenta
|
|
5842
|
-
|
|
5843
|
-
|
|
5844
|
-
}
|
|
5845
|
-
if (node->src[3]->data) {
|
|
5846
|
-
ggml_set_zero(node->src[3]);
|
|
5847
|
-
}
|
|
5994
|
+
ggml_set_zero(node->src[2]);
|
|
5995
|
+
ggml_set_zero(node->src[3]);
|
|
5848
5996
|
}
|
|
5849
5997
|
|
|
5850
5998
|
// initial gradients of loss should be 1, 0 otherwise
|
|
@@ -5923,12 +6071,12 @@ struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, co
|
|
|
5923
6071
|
|
|
5924
6072
|
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
|
5925
6073
|
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
5926
|
-
return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
|
|
6074
|
+
return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
|
|
5927
6075
|
}
|
|
5928
6076
|
|
|
5929
6077
|
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
|
5930
6078
|
const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
5931
|
-
return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
|
|
6079
|
+
return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
|
|
5932
6080
|
}
|
|
5933
6081
|
|
|
5934
6082
|
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
@@ -6240,9 +6388,6 @@ size_t ggml_quantize_chunk(
|
|
|
6240
6388
|
case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6241
6389
|
case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6242
6390
|
case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6243
|
-
case GGML_TYPE_Q4_0_4_4: result = quantize_q4_0_4x4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6244
|
-
case GGML_TYPE_Q4_0_4_8: result = quantize_q4_0_4x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6245
|
-
case GGML_TYPE_Q4_0_8_8: result = quantize_q4_0_8x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6246
6391
|
case GGML_TYPE_F16:
|
|
6247
6392
|
{
|
|
6248
6393
|
size_t elemsize = sizeof(ggml_fp16_t);
|
|
@@ -6378,7 +6523,7 @@ struct gguf_context {
|
|
|
6378
6523
|
void * data;
|
|
6379
6524
|
};
|
|
6380
6525
|
|
|
6381
|
-
|
|
6526
|
+
size_t gguf_type_size(enum gguf_type type) {
|
|
6382
6527
|
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
|
6383
6528
|
return GGUF_TYPE_SIZE[type];
|
|
6384
6529
|
}
|
|
@@ -6506,13 +6651,7 @@ struct gguf_context * gguf_init_empty(void) {
|
|
|
6506
6651
|
return ctx;
|
|
6507
6652
|
}
|
|
6508
6653
|
|
|
6509
|
-
struct gguf_context *
|
|
6510
|
-
FILE * file = ggml_fopen(fname, "rb");
|
|
6511
|
-
if (!file) {
|
|
6512
|
-
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
|
6513
|
-
return NULL;
|
|
6514
|
-
}
|
|
6515
|
-
|
|
6654
|
+
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
|
|
6516
6655
|
// offset from start of file
|
|
6517
6656
|
size_t offset = 0;
|
|
6518
6657
|
|
|
@@ -6525,7 +6664,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6525
6664
|
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
|
6526
6665
|
if (magic[i] != GGUF_MAGIC[i]) {
|
|
6527
6666
|
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
|
6528
|
-
fclose(file);
|
|
6529
6667
|
return NULL;
|
|
6530
6668
|
}
|
|
6531
6669
|
}
|
|
@@ -6536,7 +6674,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6536
6674
|
struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
|
|
6537
6675
|
if (!ctx) {
|
|
6538
6676
|
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
|
|
6539
|
-
fclose(file);
|
|
6540
6677
|
return NULL;
|
|
6541
6678
|
}
|
|
6542
6679
|
|
|
@@ -6554,7 +6691,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6554
6691
|
|
|
6555
6692
|
if (ctx->header.version == 1) {
|
|
6556
6693
|
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
|
6557
|
-
fclose(file);
|
|
6558
6694
|
gguf_free(ctx);
|
|
6559
6695
|
return NULL;
|
|
6560
6696
|
}
|
|
@@ -6567,7 +6703,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6567
6703
|
|
|
6568
6704
|
if (!ok) {
|
|
6569
6705
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
|
6570
|
-
fclose(file);
|
|
6571
6706
|
gguf_free(ctx);
|
|
6572
6707
|
return NULL;
|
|
6573
6708
|
}
|
|
@@ -6577,12 +6712,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6577
6712
|
{
|
|
6578
6713
|
const uint64_t n_kv = ctx->header.n_kv;
|
|
6579
6714
|
|
|
6580
|
-
|
|
6581
|
-
|
|
6582
|
-
|
|
6583
|
-
|
|
6584
|
-
|
|
6585
|
-
|
|
6715
|
+
if (n_kv > 0) {
|
|
6716
|
+
ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
|
|
6717
|
+
if (!ctx->kv) {
|
|
6718
|
+
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
|
|
6719
|
+
gguf_free(ctx);
|
|
6720
|
+
return NULL;
|
|
6721
|
+
}
|
|
6586
6722
|
}
|
|
6587
6723
|
|
|
6588
6724
|
for (uint64_t i = 0; i < n_kv; ++i) {
|
|
@@ -6629,7 +6765,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6629
6765
|
// prevent from integer overflow in the malloc below
|
|
6630
6766
|
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
|
6631
6767
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
|
6632
|
-
fclose(file);
|
|
6633
6768
|
gguf_free(ctx);
|
|
6634
6769
|
return NULL;
|
|
6635
6770
|
}
|
|
@@ -6637,7 +6772,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6637
6772
|
kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
|
6638
6773
|
if (!kv->value.arr.data) {
|
|
6639
6774
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
|
6640
|
-
fclose(file);
|
|
6641
6775
|
gguf_free(ctx);
|
|
6642
6776
|
return NULL;
|
|
6643
6777
|
}
|
|
@@ -6649,7 +6783,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6649
6783
|
// prevent from integer overflow in the malloc below
|
|
6650
6784
|
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
|
6651
6785
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
|
6652
|
-
fclose(file);
|
|
6653
6786
|
gguf_free(ctx);
|
|
6654
6787
|
return NULL;
|
|
6655
6788
|
}
|
|
@@ -6657,7 +6790,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6657
6790
|
kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
|
|
6658
6791
|
if (!kv->value.arr.data) {
|
|
6659
6792
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
|
6660
|
-
fclose(file);
|
|
6661
6793
|
gguf_free(ctx);
|
|
6662
6794
|
return NULL;
|
|
6663
6795
|
}
|
|
@@ -6688,7 +6820,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6688
6820
|
|
|
6689
6821
|
if (!ok) {
|
|
6690
6822
|
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
|
6691
|
-
fclose(file);
|
|
6692
6823
|
gguf_free(ctx);
|
|
6693
6824
|
return NULL;
|
|
6694
6825
|
}
|
|
@@ -6699,7 +6830,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6699
6830
|
ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
|
|
6700
6831
|
if (!ctx->infos) {
|
|
6701
6832
|
fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
|
|
6702
|
-
fclose(file);
|
|
6703
6833
|
gguf_free(ctx);
|
|
6704
6834
|
return NULL;
|
|
6705
6835
|
}
|
|
@@ -6735,7 +6865,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6735
6865
|
|
|
6736
6866
|
if (!ok) {
|
|
6737
6867
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
|
6738
|
-
fclose(file);
|
|
6739
6868
|
gguf_free(ctx);
|
|
6740
6869
|
return NULL;
|
|
6741
6870
|
}
|
|
@@ -6774,10 +6903,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6774
6903
|
(int64_t) info->ne[2] *
|
|
6775
6904
|
(int64_t) info->ne[3];
|
|
6776
6905
|
|
|
6777
|
-
if (ggml_blck_size(info->type) == 0
|
|
6906
|
+
if (ggml_blck_size(info->type) == 0 ) {
|
|
6907
|
+
// this tensor type support have been removed:
|
|
6908
|
+
fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
|
|
6909
|
+
__func__, info->name.data, (int) info->type, ggml_type_name(info->type));
|
|
6910
|
+
gguf_free(ctx);
|
|
6911
|
+
return NULL;
|
|
6912
|
+
}
|
|
6913
|
+
|
|
6914
|
+
if (ne % ggml_blck_size(info->type) != 0) {
|
|
6778
6915
|
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
|
6779
6916
|
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
|
|
6780
|
-
fclose(file);
|
|
6781
6917
|
gguf_free(ctx);
|
|
6782
6918
|
return NULL;
|
|
6783
6919
|
}
|
|
@@ -6809,7 +6945,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6809
6945
|
*params.ctx = ggml_init(pdata);
|
|
6810
6946
|
if (*params.ctx == NULL) {
|
|
6811
6947
|
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
|
6812
|
-
fclose(file);
|
|
6813
6948
|
gguf_free(ctx);
|
|
6814
6949
|
return NULL;
|
|
6815
6950
|
}
|
|
@@ -6828,7 +6963,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6828
6963
|
|
|
6829
6964
|
if (!ok) {
|
|
6830
6965
|
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
|
6831
|
-
fclose(file);
|
|
6832
6966
|
ggml_free(ctx_data);
|
|
6833
6967
|
gguf_free(ctx);
|
|
6834
6968
|
return NULL;
|
|
@@ -6867,7 +7001,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6867
7001
|
|
|
6868
7002
|
if (!ok) {
|
|
6869
7003
|
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
|
6870
|
-
fclose(file);
|
|
6871
7004
|
ggml_free(ctx_data);
|
|
6872
7005
|
gguf_free(ctx);
|
|
6873
7006
|
return NULL;
|
|
@@ -6876,11 +7009,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
6876
7009
|
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
|
6877
7010
|
}
|
|
6878
7011
|
|
|
6879
|
-
fclose(file);
|
|
6880
|
-
|
|
6881
7012
|
return ctx;
|
|
6882
7013
|
}
|
|
6883
7014
|
|
|
7015
|
+
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
|
7016
|
+
FILE * file = ggml_fopen(fname, "rb");
|
|
7017
|
+
if (!file) {
|
|
7018
|
+
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
|
7019
|
+
return NULL;
|
|
7020
|
+
}
|
|
7021
|
+
|
|
7022
|
+
struct gguf_context * result = gguf_init_from_file_impl(file, params);
|
|
7023
|
+
fclose(file);
|
|
7024
|
+
return result;
|
|
7025
|
+
}
|
|
7026
|
+
|
|
6884
7027
|
void gguf_free(struct gguf_context * ctx) {
|
|
6885
7028
|
if (ctx == NULL) {
|
|
6886
7029
|
return;
|
|
@@ -7340,13 +7483,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
|
|
|
7340
7483
|
// fwrite(val, sizeof(char), size, file);
|
|
7341
7484
|
//}
|
|
7342
7485
|
|
|
7343
|
-
struct gguf_buf {
|
|
7344
|
-
void * data;
|
|
7345
|
-
size_t size;
|
|
7346
|
-
size_t offset;
|
|
7347
|
-
};
|
|
7348
|
-
|
|
7349
|
-
static struct gguf_buf gguf_buf_init(size_t size) {
|
|
7486
|
+
struct gguf_buf gguf_buf_init(size_t size) {
|
|
7350
7487
|
struct gguf_buf buf = {
|
|
7351
7488
|
/*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
|
|
7352
7489
|
/*buf.size =*/ size,
|
|
@@ -7356,7 +7493,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
|
|
|
7356
7493
|
return buf;
|
|
7357
7494
|
}
|
|
7358
7495
|
|
|
7359
|
-
|
|
7496
|
+
void gguf_buf_free(struct gguf_buf buf) {
|
|
7360
7497
|
if (buf.data) {
|
|
7361
7498
|
GGML_FREE(buf.data);
|
|
7362
7499
|
}
|
|
@@ -7394,7 +7531,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
|
|
|
7394
7531
|
buf->offset += el_size;
|
|
7395
7532
|
}
|
|
7396
7533
|
|
|
7397
|
-
|
|
7534
|
+
void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
|
|
7398
7535
|
// write header
|
|
7399
7536
|
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
|
7400
7537
|
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|
|
@@ -7549,3 +7686,26 @@ void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
|
7549
7686
|
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
|
7550
7687
|
g_logger_state.log_callback_user_data = user_data;
|
|
7551
7688
|
}
|
|
7689
|
+
|
|
7690
|
+
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
|
|
7691
|
+
p->n_threads = n_threads;
|
|
7692
|
+
p->prio = 0; // default priority (usually means normal or inherited)
|
|
7693
|
+
p->poll = 50; // hybrid-polling enabled
|
|
7694
|
+
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
|
|
7695
|
+
p->paused = false; // threads are ready to go
|
|
7696
|
+
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
|
|
7697
|
+
}
|
|
7698
|
+
|
|
7699
|
+
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
|
7700
|
+
struct ggml_threadpool_params p;
|
|
7701
|
+
ggml_threadpool_params_init(&p, n_threads);
|
|
7702
|
+
return p;
|
|
7703
|
+
}
|
|
7704
|
+
|
|
7705
|
+
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
|
7706
|
+
if (p0->n_threads != p1->n_threads ) return false;
|
|
7707
|
+
if (p0->prio != p1->prio ) return false;
|
|
7708
|
+
if (p0->poll != p1->poll ) return false;
|
|
7709
|
+
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
|
7710
|
+
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
|
7711
|
+
}
|