@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -176,25 +176,15 @@
|
|
|
176
176
|
#ifdef GGML_SHARED
|
|
177
177
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
|
178
178
|
# ifdef GGML_BUILD
|
|
179
|
-
# define GGML_API __declspec(dllexport)
|
|
179
|
+
# define GGML_API __declspec(dllexport) extern
|
|
180
180
|
# else
|
|
181
|
-
# define GGML_API __declspec(dllimport)
|
|
181
|
+
# define GGML_API __declspec(dllimport) extern
|
|
182
182
|
# endif
|
|
183
183
|
# else
|
|
184
|
-
# define GGML_API __attribute__ ((visibility ("default")))
|
|
184
|
+
# define GGML_API __attribute__ ((visibility ("default"))) extern
|
|
185
185
|
# endif
|
|
186
186
|
#else
|
|
187
|
-
# define GGML_API
|
|
188
|
-
#endif
|
|
189
|
-
|
|
190
|
-
#ifdef GGML_MULTIPLATFORM
|
|
191
|
-
# if defined(_WIN32)
|
|
192
|
-
# define GGML_CALL
|
|
193
|
-
# else
|
|
194
|
-
# define GGML_CALL __attribute__((__ms_abi__))
|
|
195
|
-
# endif
|
|
196
|
-
#else
|
|
197
|
-
# define GGML_CALL
|
|
187
|
+
# define GGML_API extern
|
|
198
188
|
#endif
|
|
199
189
|
|
|
200
190
|
// TODO: support for clang
|
|
@@ -220,21 +210,24 @@
|
|
|
220
210
|
#include <stdio.h>
|
|
221
211
|
|
|
222
212
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
|
223
|
-
#define GGML_FILE_VERSION
|
|
213
|
+
#define GGML_FILE_VERSION 2
|
|
224
214
|
|
|
225
215
|
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
|
226
216
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
|
227
217
|
|
|
228
218
|
#define GGML_MAX_DIMS 4
|
|
229
219
|
#define GGML_MAX_PARAMS 2048
|
|
230
|
-
#define GGML_MAX_CONTEXTS 64
|
|
231
220
|
#define GGML_MAX_SRC 10
|
|
221
|
+
#define GGML_MAX_N_THREADS 512
|
|
222
|
+
#define GGML_MAX_OP_PARAMS 64
|
|
223
|
+
|
|
232
224
|
#ifndef GGML_MAX_NAME
|
|
233
|
-
#define GGML_MAX_NAME
|
|
225
|
+
# define GGML_MAX_NAME 64
|
|
234
226
|
#endif
|
|
235
|
-
|
|
227
|
+
|
|
236
228
|
#define GGML_DEFAULT_N_THREADS 4
|
|
237
229
|
#define GGML_DEFAULT_GRAPH_SIZE 2048
|
|
230
|
+
|
|
238
231
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
|
239
232
|
#define GGML_MEM_ALIGN 4
|
|
240
233
|
#else
|
|
@@ -244,6 +237,8 @@
|
|
|
244
237
|
#define GGML_EXIT_SUCCESS 0
|
|
245
238
|
#define GGML_EXIT_ABORTED 1
|
|
246
239
|
|
|
240
|
+
#define GGML_ROPE_TYPE_NEOX 2
|
|
241
|
+
|
|
247
242
|
#define GGUF_MAGIC "GGUF"
|
|
248
243
|
|
|
249
244
|
#define GGUF_VERSION 3
|
|
@@ -255,21 +250,21 @@
|
|
|
255
250
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
256
251
|
|
|
257
252
|
#ifndef NDEBUG
|
|
258
|
-
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
|
253
|
+
# define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
|
259
254
|
#elif defined(__GNUC__)
|
|
260
|
-
#define GGML_UNREACHABLE() __builtin_unreachable()
|
|
255
|
+
# define GGML_UNREACHABLE() __builtin_unreachable()
|
|
261
256
|
#elif defined(_MSC_VER)
|
|
262
|
-
#define GGML_UNREACHABLE() __assume(0)
|
|
257
|
+
# define GGML_UNREACHABLE() __assume(0)
|
|
263
258
|
#else
|
|
264
|
-
#define GGML_UNREACHABLE() ((void) 0)
|
|
259
|
+
# define GGML_UNREACHABLE() ((void) 0)
|
|
265
260
|
#endif
|
|
266
261
|
|
|
267
262
|
#ifdef __cplusplus
|
|
268
|
-
#define GGML_NORETURN [[noreturn]]
|
|
263
|
+
# define GGML_NORETURN [[noreturn]]
|
|
269
264
|
#elif defined(_MSC_VER)
|
|
270
|
-
#define GGML_NORETURN __declspec(noreturn)
|
|
265
|
+
# define GGML_NORETURN __declspec(noreturn)
|
|
271
266
|
#else
|
|
272
|
-
#define GGML_NORETURN _Noreturn
|
|
267
|
+
# define GGML_NORETURN _Noreturn
|
|
273
268
|
#endif
|
|
274
269
|
|
|
275
270
|
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
|
@@ -334,7 +329,7 @@ extern "C" {
|
|
|
334
329
|
};
|
|
335
330
|
|
|
336
331
|
// get ggml_status name string
|
|
337
|
-
GGML_API
|
|
332
|
+
GGML_API const char * ggml_status_to_string(enum ggml_status status);
|
|
338
333
|
|
|
339
334
|
// ieee 754-2008 half-precision float16
|
|
340
335
|
// todo: make this not an integral type
|
|
@@ -349,10 +344,12 @@ extern "C" {
|
|
|
349
344
|
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
|
|
350
345
|
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
|
|
351
346
|
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
|
|
347
|
+
GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
|
|
352
348
|
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
|
|
353
349
|
|
|
354
350
|
struct ggml_object;
|
|
355
351
|
struct ggml_context;
|
|
352
|
+
struct ggml_cgraph;
|
|
356
353
|
|
|
357
354
|
// NOTE: always add types at the end of the enum to keep backward compatibility
|
|
358
355
|
enum ggml_type {
|
|
@@ -390,6 +387,8 @@ extern "C" {
|
|
|
390
387
|
GGML_TYPE_Q4_0_4_4 = 31,
|
|
391
388
|
GGML_TYPE_Q4_0_4_8 = 32,
|
|
392
389
|
GGML_TYPE_Q4_0_8_8 = 33,
|
|
390
|
+
GGML_TYPE_TQ1_0 = 34,
|
|
391
|
+
GGML_TYPE_TQ2_0 = 35,
|
|
393
392
|
GGML_TYPE_COUNT,
|
|
394
393
|
};
|
|
395
394
|
|
|
@@ -450,10 +449,13 @@ extern "C" {
|
|
|
450
449
|
GGML_OP_SQR,
|
|
451
450
|
GGML_OP_SQRT,
|
|
452
451
|
GGML_OP_LOG,
|
|
452
|
+
GGML_OP_SIN,
|
|
453
|
+
GGML_OP_COS,
|
|
453
454
|
GGML_OP_SUM,
|
|
454
455
|
GGML_OP_SUM_ROWS,
|
|
455
456
|
GGML_OP_MEAN,
|
|
456
457
|
GGML_OP_ARGMAX,
|
|
458
|
+
GGML_OP_COUNT_EQUAL,
|
|
457
459
|
GGML_OP_REPEAT,
|
|
458
460
|
GGML_OP_REPEAT_BACK,
|
|
459
461
|
GGML_OP_CONCAT,
|
|
@@ -487,9 +489,11 @@ extern "C" {
|
|
|
487
489
|
GGML_OP_CLAMP,
|
|
488
490
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
489
491
|
GGML_OP_IM2COL,
|
|
492
|
+
GGML_OP_IM2COL_BACK,
|
|
490
493
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
491
494
|
GGML_OP_POOL_1D,
|
|
492
495
|
GGML_OP_POOL_2D,
|
|
496
|
+
GGML_OP_POOL_2D_BACK,
|
|
493
497
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
494
498
|
GGML_OP_PAD,
|
|
495
499
|
GGML_OP_ARANGE,
|
|
@@ -505,6 +509,7 @@ extern "C" {
|
|
|
505
509
|
GGML_OP_WIN_UNPART,
|
|
506
510
|
GGML_OP_GET_REL_POS,
|
|
507
511
|
GGML_OP_ADD_REL_POS,
|
|
512
|
+
GGML_OP_RWKV_WKV6,
|
|
508
513
|
|
|
509
514
|
GGML_OP_UNARY,
|
|
510
515
|
|
|
@@ -521,6 +526,7 @@ extern "C" {
|
|
|
521
526
|
|
|
522
527
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
|
523
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
529
|
+
GGML_OP_OPT_STEP_ADAMW,
|
|
524
530
|
|
|
525
531
|
GGML_OP_COUNT,
|
|
526
532
|
};
|
|
@@ -539,6 +545,7 @@ extern "C" {
|
|
|
539
545
|
GGML_UNARY_OP_SILU,
|
|
540
546
|
GGML_UNARY_OP_HARDSWISH,
|
|
541
547
|
GGML_UNARY_OP_HARDSIGMOID,
|
|
548
|
+
GGML_UNARY_OP_EXP,
|
|
542
549
|
|
|
543
550
|
GGML_UNARY_OP_COUNT,
|
|
544
551
|
};
|
|
@@ -550,35 +557,32 @@ extern "C" {
|
|
|
550
557
|
};
|
|
551
558
|
|
|
552
559
|
enum ggml_log_level {
|
|
553
|
-
|
|
560
|
+
GGML_LOG_LEVEL_NONE = 0,
|
|
561
|
+
GGML_LOG_LEVEL_DEBUG = 1,
|
|
562
|
+
GGML_LOG_LEVEL_INFO = 2,
|
|
554
563
|
GGML_LOG_LEVEL_WARN = 3,
|
|
555
|
-
|
|
556
|
-
|
|
564
|
+
GGML_LOG_LEVEL_ERROR = 4,
|
|
565
|
+
GGML_LOG_LEVEL_CONT = 5, // continue previous log
|
|
557
566
|
};
|
|
558
567
|
|
|
568
|
+
// this tensor...
|
|
559
569
|
enum ggml_tensor_flag {
|
|
560
|
-
GGML_TENSOR_FLAG_INPUT =
|
|
561
|
-
GGML_TENSOR_FLAG_OUTPUT =
|
|
562
|
-
GGML_TENSOR_FLAG_PARAM =
|
|
570
|
+
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
|
571
|
+
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
|
572
|
+
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
|
573
|
+
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
563
574
|
};
|
|
564
575
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
size_t
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
struct ggml_object * next;
|
|
571
|
-
|
|
572
|
-
enum ggml_object_type type;
|
|
573
|
-
|
|
574
|
-
char padding[4];
|
|
576
|
+
struct ggml_init_params {
|
|
577
|
+
// memory pool
|
|
578
|
+
size_t mem_size; // bytes
|
|
579
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
580
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
|
575
581
|
};
|
|
576
582
|
|
|
577
|
-
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
|
578
|
-
|
|
579
583
|
// n-dimensional tensor
|
|
580
584
|
struct ggml_tensor {
|
|
581
|
-
enum ggml_type
|
|
585
|
+
enum ggml_type type;
|
|
582
586
|
|
|
583
587
|
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
|
584
588
|
|
|
@@ -598,7 +602,6 @@ extern "C" {
|
|
|
598
602
|
|
|
599
603
|
int32_t flags;
|
|
600
604
|
|
|
601
|
-
struct ggml_tensor * grad;
|
|
602
605
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
|
603
606
|
|
|
604
607
|
// source tensor and offset for views
|
|
@@ -611,7 +614,7 @@ extern "C" {
|
|
|
611
614
|
|
|
612
615
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
613
616
|
|
|
614
|
-
|
|
617
|
+
char padding[8];
|
|
615
618
|
};
|
|
616
619
|
|
|
617
620
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
|
@@ -621,71 +624,6 @@ extern "C" {
|
|
|
621
624
|
// If it returns true, the computation is aborted
|
|
622
625
|
typedef bool (*ggml_abort_callback)(void * data);
|
|
623
626
|
|
|
624
|
-
// the compute plan that needs to be prepared for ggml_graph_compute()
|
|
625
|
-
// since https://github.com/ggerganov/ggml/issues/287
|
|
626
|
-
struct ggml_cplan {
|
|
627
|
-
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
|
628
|
-
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
|
629
|
-
|
|
630
|
-
int n_threads;
|
|
631
|
-
|
|
632
|
-
// abort ggml_graph_compute when true
|
|
633
|
-
ggml_abort_callback abort_callback;
|
|
634
|
-
void * abort_callback_data;
|
|
635
|
-
};
|
|
636
|
-
|
|
637
|
-
enum ggml_cgraph_eval_order {
|
|
638
|
-
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
|
639
|
-
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
|
640
|
-
GGML_CGRAPH_EVAL_ORDER_COUNT
|
|
641
|
-
};
|
|
642
|
-
|
|
643
|
-
typedef uint32_t ggml_bitset_t;
|
|
644
|
-
|
|
645
|
-
struct ggml_hash_set {
|
|
646
|
-
size_t size;
|
|
647
|
-
ggml_bitset_t * used;
|
|
648
|
-
struct ggml_tensor ** keys;
|
|
649
|
-
};
|
|
650
|
-
|
|
651
|
-
// computation graph
|
|
652
|
-
struct ggml_cgraph {
|
|
653
|
-
int size;
|
|
654
|
-
int n_nodes;
|
|
655
|
-
int n_leafs;
|
|
656
|
-
|
|
657
|
-
struct ggml_tensor ** nodes;
|
|
658
|
-
struct ggml_tensor ** grads;
|
|
659
|
-
struct ggml_tensor ** leafs;
|
|
660
|
-
|
|
661
|
-
struct ggml_hash_set visited_hash_set;
|
|
662
|
-
|
|
663
|
-
enum ggml_cgraph_eval_order order;
|
|
664
|
-
};
|
|
665
|
-
|
|
666
|
-
// scratch buffer
|
|
667
|
-
struct ggml_scratch {
|
|
668
|
-
size_t offs;
|
|
669
|
-
size_t size;
|
|
670
|
-
void * data;
|
|
671
|
-
};
|
|
672
|
-
|
|
673
|
-
struct ggml_init_params {
|
|
674
|
-
// memory pool
|
|
675
|
-
size_t mem_size; // bytes
|
|
676
|
-
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
677
|
-
bool no_alloc; // don't allocate memory for the tensor data
|
|
678
|
-
};
|
|
679
|
-
|
|
680
|
-
// numa strategies
|
|
681
|
-
enum ggml_numa_strategy {
|
|
682
|
-
GGML_NUMA_STRATEGY_DISABLED = 0,
|
|
683
|
-
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
|
684
|
-
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
|
685
|
-
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
|
686
|
-
GGML_NUMA_STRATEGY_MIRROR = 4,
|
|
687
|
-
GGML_NUMA_STRATEGY_COUNT
|
|
688
|
-
};
|
|
689
627
|
|
|
690
628
|
//
|
|
691
629
|
// GUID
|
|
@@ -708,52 +646,49 @@ extern "C" {
|
|
|
708
646
|
// accepts a UTF-8 path, even on Windows
|
|
709
647
|
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
|
710
648
|
|
|
711
|
-
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
|
712
|
-
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
|
713
|
-
|
|
714
649
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
|
715
650
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
|
716
651
|
|
|
717
|
-
GGML_API
|
|
718
|
-
GGML_API
|
|
719
|
-
GGML_API
|
|
720
|
-
GGML_API
|
|
652
|
+
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
|
653
|
+
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
|
654
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
|
655
|
+
GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
|
721
656
|
|
|
722
|
-
GGML_API
|
|
723
|
-
GGML_API
|
|
724
|
-
GGML_API
|
|
657
|
+
GGML_API int64_t ggml_blck_size(enum ggml_type type);
|
|
658
|
+
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
|
|
659
|
+
GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
|
725
660
|
|
|
726
661
|
GGML_DEPRECATED(
|
|
727
662
|
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
|
|
728
663
|
"use ggml_row_size() instead");
|
|
729
664
|
|
|
730
|
-
GGML_API
|
|
731
|
-
GGML_API
|
|
732
|
-
GGML_API
|
|
665
|
+
GGML_API const char * ggml_type_name(enum ggml_type type);
|
|
666
|
+
GGML_API const char * ggml_op_name (enum ggml_op op);
|
|
667
|
+
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
733
668
|
|
|
734
|
-
GGML_API
|
|
735
|
-
GGML_API
|
|
669
|
+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
670
|
+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
736
671
|
|
|
737
|
-
GGML_API
|
|
672
|
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
738
673
|
|
|
739
|
-
GGML_API
|
|
674
|
+
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
|
740
675
|
|
|
741
676
|
// TODO: temporary until model loading of ggml examples is refactored
|
|
742
677
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
|
743
678
|
|
|
744
|
-
GGML_API
|
|
745
|
-
GGML_API
|
|
746
|
-
GGML_API
|
|
747
|
-
GGML_API
|
|
748
|
-
GGML_API
|
|
749
|
-
GGML_API
|
|
750
|
-
GGML_API
|
|
751
|
-
GGML_API
|
|
679
|
+
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
|
680
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
|
681
|
+
GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
|
|
682
|
+
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
|
683
|
+
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
|
684
|
+
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
|
685
|
+
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
|
686
|
+
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
|
752
687
|
|
|
753
|
-
GGML_API
|
|
754
|
-
GGML_API
|
|
755
|
-
GGML_API
|
|
756
|
-
GGML_API
|
|
688
|
+
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
|
689
|
+
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
|
690
|
+
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
|
691
|
+
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
|
757
692
|
|
|
758
693
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
759
694
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
@@ -767,12 +702,12 @@ extern "C" {
|
|
|
767
702
|
|
|
768
703
|
// main
|
|
769
704
|
|
|
770
|
-
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
|
771
|
-
GGML_API void
|
|
705
|
+
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
|
|
706
|
+
GGML_API void ggml_reset(struct ggml_context * ctx);
|
|
707
|
+
GGML_API void ggml_free (struct ggml_context * ctx);
|
|
772
708
|
|
|
773
709
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
774
710
|
|
|
775
|
-
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
|
776
711
|
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
|
777
712
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
|
778
713
|
|
|
@@ -812,8 +747,7 @@ extern "C" {
|
|
|
812
747
|
int64_t ne2,
|
|
813
748
|
int64_t ne3);
|
|
814
749
|
|
|
815
|
-
GGML_API
|
|
816
|
-
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
|
750
|
+
GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
|
|
817
751
|
|
|
818
752
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
819
753
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
|
@@ -823,35 +757,25 @@ extern "C" {
|
|
|
823
757
|
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
|
824
758
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
|
825
759
|
|
|
826
|
-
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
827
|
-
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
|
828
|
-
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
|
829
|
-
|
|
830
760
|
// Converts a flat index into coordinates
|
|
831
|
-
GGML_API void
|
|
761
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
832
762
|
|
|
833
|
-
GGML_API
|
|
834
|
-
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
|
835
|
-
|
|
836
|
-
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
|
837
|
-
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
|
838
|
-
|
|
839
|
-
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
|
840
|
-
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
|
841
|
-
|
|
842
|
-
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
|
843
|
-
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
|
763
|
+
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
844
764
|
|
|
845
765
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
846
766
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
847
767
|
|
|
848
|
-
GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
849
|
-
|
|
850
768
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
|
851
769
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
|
852
770
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
|
853
771
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
|
854
772
|
|
|
773
|
+
// Tensor flags
|
|
774
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
775
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
776
|
+
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
|
|
777
|
+
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
|
778
|
+
|
|
855
779
|
//
|
|
856
780
|
// operations on tensors with backpropagation
|
|
857
781
|
//
|
|
@@ -966,6 +890,22 @@ extern "C" {
|
|
|
966
890
|
struct ggml_context * ctx,
|
|
967
891
|
struct ggml_tensor * a);
|
|
968
892
|
|
|
893
|
+
GGML_API struct ggml_tensor * ggml_sin(
|
|
894
|
+
struct ggml_context * ctx,
|
|
895
|
+
struct ggml_tensor * a);
|
|
896
|
+
|
|
897
|
+
GGML_API struct ggml_tensor * ggml_sin_inplace(
|
|
898
|
+
struct ggml_context * ctx,
|
|
899
|
+
struct ggml_tensor * a);
|
|
900
|
+
|
|
901
|
+
GGML_API struct ggml_tensor * ggml_cos(
|
|
902
|
+
struct ggml_context * ctx,
|
|
903
|
+
struct ggml_tensor * a);
|
|
904
|
+
|
|
905
|
+
GGML_API struct ggml_tensor * ggml_cos_inplace(
|
|
906
|
+
struct ggml_context * ctx,
|
|
907
|
+
struct ggml_tensor * a);
|
|
908
|
+
|
|
969
909
|
// return scalar
|
|
970
910
|
GGML_API struct ggml_tensor * ggml_sum(
|
|
971
911
|
struct ggml_context * ctx,
|
|
@@ -986,6 +926,12 @@ extern "C" {
|
|
|
986
926
|
struct ggml_context * ctx,
|
|
987
927
|
struct ggml_tensor * a);
|
|
988
928
|
|
|
929
|
+
// count number of equal elements in a and b
|
|
930
|
+
GGML_API struct ggml_tensor * ggml_count_equal(
|
|
931
|
+
struct ggml_context * ctx,
|
|
932
|
+
struct ggml_tensor * a,
|
|
933
|
+
struct ggml_tensor * b);
|
|
934
|
+
|
|
989
935
|
// if a is the same shape as b, and a is not parameter, return a
|
|
990
936
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
|
991
937
|
GGML_API struct ggml_tensor * ggml_repeat(
|
|
@@ -1116,6 +1062,14 @@ extern "C" {
|
|
|
1116
1062
|
struct ggml_context * ctx,
|
|
1117
1063
|
struct ggml_tensor * a);
|
|
1118
1064
|
|
|
1065
|
+
GGML_API struct ggml_tensor * ggml_exp(
|
|
1066
|
+
struct ggml_context * ctx,
|
|
1067
|
+
struct ggml_tensor * a);
|
|
1068
|
+
|
|
1069
|
+
GGML_API struct ggml_tensor * ggml_exp_inplace(
|
|
1070
|
+
struct ggml_context * ctx,
|
|
1071
|
+
struct ggml_tensor * a);
|
|
1072
|
+
|
|
1119
1073
|
// normalize along rows
|
|
1120
1074
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1121
1075
|
struct ggml_context * ctx,
|
|
@@ -1139,16 +1093,17 @@ extern "C" {
|
|
|
1139
1093
|
|
|
1140
1094
|
// group normalize along ne0*ne1*n_groups
|
|
1141
1095
|
// used in stable-diffusion
|
|
1142
|
-
// TODO: eps is hardcoded to 1e-6 for now
|
|
1143
1096
|
GGML_API struct ggml_tensor * ggml_group_norm(
|
|
1144
1097
|
struct ggml_context * ctx,
|
|
1145
1098
|
struct ggml_tensor * a,
|
|
1146
|
-
int n_groups
|
|
1099
|
+
int n_groups,
|
|
1100
|
+
float eps);
|
|
1147
1101
|
|
|
1148
1102
|
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
|
1149
1103
|
struct ggml_context * ctx,
|
|
1150
1104
|
struct ggml_tensor * a,
|
|
1151
|
-
int n_groups
|
|
1105
|
+
int n_groups,
|
|
1106
|
+
float eps);
|
|
1152
1107
|
|
|
1153
1108
|
// a - x
|
|
1154
1109
|
// b - dy
|
|
@@ -1210,7 +1165,7 @@ extern "C" {
|
|
|
1210
1165
|
size_t nb1,
|
|
1211
1166
|
size_t nb2,
|
|
1212
1167
|
size_t nb3,
|
|
1213
|
-
size_t offset);
|
|
1168
|
+
size_t offset); // in bytes
|
|
1214
1169
|
|
|
1215
1170
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
|
1216
1171
|
GGML_API struct ggml_tensor * ggml_set_inplace(
|
|
@@ -1220,19 +1175,19 @@ extern "C" {
|
|
|
1220
1175
|
size_t nb1,
|
|
1221
1176
|
size_t nb2,
|
|
1222
1177
|
size_t nb3,
|
|
1223
|
-
size_t offset);
|
|
1178
|
+
size_t offset); // in bytes
|
|
1224
1179
|
|
|
1225
1180
|
GGML_API struct ggml_tensor * ggml_set_1d(
|
|
1226
1181
|
struct ggml_context * ctx,
|
|
1227
1182
|
struct ggml_tensor * a,
|
|
1228
1183
|
struct ggml_tensor * b,
|
|
1229
|
-
size_t offset);
|
|
1184
|
+
size_t offset); // in bytes
|
|
1230
1185
|
|
|
1231
1186
|
GGML_API struct ggml_tensor * ggml_set_1d_inplace(
|
|
1232
1187
|
struct ggml_context * ctx,
|
|
1233
1188
|
struct ggml_tensor * a,
|
|
1234
1189
|
struct ggml_tensor * b,
|
|
1235
|
-
size_t offset);
|
|
1190
|
+
size_t offset); // in bytes
|
|
1236
1191
|
|
|
1237
1192
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1238
1193
|
GGML_API struct ggml_tensor * ggml_set_2d(
|
|
@@ -1240,7 +1195,7 @@ extern "C" {
|
|
|
1240
1195
|
struct ggml_tensor * a,
|
|
1241
1196
|
struct ggml_tensor * b,
|
|
1242
1197
|
size_t nb1,
|
|
1243
|
-
size_t offset);
|
|
1198
|
+
size_t offset); // in bytes
|
|
1244
1199
|
|
|
1245
1200
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
|
1246
1201
|
GGML_API struct ggml_tensor * ggml_set_2d_inplace(
|
|
@@ -1248,7 +1203,7 @@ extern "C" {
|
|
|
1248
1203
|
struct ggml_tensor * a,
|
|
1249
1204
|
struct ggml_tensor * b,
|
|
1250
1205
|
size_t nb1,
|
|
1251
|
-
size_t offset);
|
|
1206
|
+
size_t offset); // in bytes
|
|
1252
1207
|
|
|
1253
1208
|
// a -> b, return view(b)
|
|
1254
1209
|
GGML_API struct ggml_tensor * ggml_cpy(
|
|
@@ -1383,14 +1338,14 @@ extern "C" {
|
|
|
1383
1338
|
// supports 3D: a->ne[2] == b->ne[1]
|
|
1384
1339
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
|
1385
1340
|
struct ggml_context * ctx,
|
|
1386
|
-
struct ggml_tensor * a,
|
|
1387
|
-
struct ggml_tensor * b);
|
|
1341
|
+
struct ggml_tensor * a, // data
|
|
1342
|
+
struct ggml_tensor * b); // row indices
|
|
1388
1343
|
|
|
1389
1344
|
GGML_API struct ggml_tensor * ggml_get_rows_back(
|
|
1390
1345
|
struct ggml_context * ctx,
|
|
1391
|
-
struct ggml_tensor * a,
|
|
1392
|
-
struct ggml_tensor * b,
|
|
1393
|
-
struct ggml_tensor * c);
|
|
1346
|
+
struct ggml_tensor * a, // gradients of ggml_get_rows result
|
|
1347
|
+
struct ggml_tensor * b, // row indices
|
|
1348
|
+
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1394
1349
|
|
|
1395
1350
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1396
1351
|
struct ggml_context * ctx,
|
|
@@ -1451,11 +1406,10 @@ extern "C" {
|
|
|
1451
1406
|
struct ggml_tensor * b);
|
|
1452
1407
|
|
|
1453
1408
|
// rotary position embedding
|
|
1454
|
-
// if mode & 1
|
|
1455
|
-
// if mode &
|
|
1409
|
+
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
|
1410
|
+
// if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
|
|
1456
1411
|
//
|
|
1457
1412
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
|
1458
|
-
// c is freq factors (e.g. phi3-128k), (optional)
|
|
1459
1413
|
GGML_API struct ggml_tensor * ggml_rope(
|
|
1460
1414
|
struct ggml_context * ctx,
|
|
1461
1415
|
struct ggml_tensor * a,
|
|
@@ -1472,6 +1426,7 @@ extern "C" {
|
|
|
1472
1426
|
int mode);
|
|
1473
1427
|
|
|
1474
1428
|
// custom RoPE
|
|
1429
|
+
// c is freq factors (e.g. phi3-128k), (optional)
|
|
1475
1430
|
GGML_API struct ggml_tensor * ggml_rope_ext(
|
|
1476
1431
|
struct ggml_context * ctx,
|
|
1477
1432
|
struct ggml_tensor * a,
|
|
@@ -1534,16 +1489,16 @@ extern "C" {
|
|
|
1534
1489
|
"use ggml_rope_ext_inplace instead");
|
|
1535
1490
|
|
|
1536
1491
|
// compute correction dims for YaRN RoPE scaling
|
|
1537
|
-
|
|
1492
|
+
GGML_API void ggml_rope_yarn_corr_dims(
|
|
1538
1493
|
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1539
1494
|
|
|
1540
1495
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1541
1496
|
// a - dy
|
|
1542
1497
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
|
1543
1498
|
struct ggml_context * ctx,
|
|
1544
|
-
struct ggml_tensor * a,
|
|
1545
|
-
struct ggml_tensor * b,
|
|
1546
|
-
struct ggml_tensor * c,
|
|
1499
|
+
struct ggml_tensor * a, // gradients of ggml_rope result
|
|
1500
|
+
struct ggml_tensor * b, // positions
|
|
1501
|
+
struct ggml_tensor * c, // freq factors
|
|
1547
1502
|
int n_dims,
|
|
1548
1503
|
int mode,
|
|
1549
1504
|
int n_ctx_orig,
|
|
@@ -1562,34 +1517,49 @@ extern "C" {
|
|
|
1562
1517
|
float min,
|
|
1563
1518
|
float max);
|
|
1564
1519
|
|
|
1520
|
+
// im2col
|
|
1521
|
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
|
1565
1522
|
GGML_API struct ggml_tensor * ggml_im2col(
|
|
1566
1523
|
struct ggml_context * ctx,
|
|
1567
|
-
struct ggml_tensor * a,
|
|
1568
|
-
struct ggml_tensor * b,
|
|
1569
|
-
int
|
|
1570
|
-
int
|
|
1571
|
-
int
|
|
1572
|
-
int
|
|
1573
|
-
int
|
|
1574
|
-
int
|
|
1575
|
-
bool
|
|
1576
|
-
enum ggml_type
|
|
1524
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1525
|
+
struct ggml_tensor * b, // data
|
|
1526
|
+
int s0, // stride dimension 0
|
|
1527
|
+
int s1, // stride dimension 1
|
|
1528
|
+
int p0, // padding dimension 0
|
|
1529
|
+
int p1, // padding dimension 1
|
|
1530
|
+
int d0, // dilation dimension 0
|
|
1531
|
+
int d1, // dilation dimension 1
|
|
1532
|
+
bool is_2D,
|
|
1533
|
+
enum ggml_type dst_type);
|
|
1534
|
+
|
|
1535
|
+
GGML_API struct ggml_tensor * ggml_im2col_back(
|
|
1536
|
+
struct ggml_context * ctx,
|
|
1537
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1538
|
+
struct ggml_tensor * b, // gradient of im2col output
|
|
1539
|
+
int64_t * ne, // shape of im2col input
|
|
1540
|
+
int s0, // stride dimension 0
|
|
1541
|
+
int s1, // stride dimension 1
|
|
1542
|
+
int p0, // padding dimension 0
|
|
1543
|
+
int p1, // padding dimension 1
|
|
1544
|
+
int d0, // dilation dimension 0
|
|
1545
|
+
int d1, // dilation dimension 1
|
|
1546
|
+
bool is_2D);
|
|
1577
1547
|
|
|
1578
1548
|
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
|
1579
1549
|
struct ggml_context * ctx,
|
|
1580
|
-
struct ggml_tensor * a,
|
|
1581
|
-
struct ggml_tensor * b,
|
|
1582
|
-
int s0,
|
|
1583
|
-
int s1,
|
|
1584
|
-
int p0,
|
|
1585
|
-
int p1,
|
|
1586
|
-
int d0,
|
|
1587
|
-
int d1);
|
|
1550
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1551
|
+
struct ggml_tensor * b, // data
|
|
1552
|
+
int s0, // stride dimension 0
|
|
1553
|
+
int s1, // stride dimension 1
|
|
1554
|
+
int p0, // padding dimension 0
|
|
1555
|
+
int p1, // padding dimension 1
|
|
1556
|
+
int d0, // dilation dimension 0
|
|
1557
|
+
int d1); // dilation dimension 1
|
|
1588
1558
|
|
|
1589
1559
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
1590
1560
|
struct ggml_context * ctx,
|
|
1591
|
-
struct ggml_tensor * a,
|
|
1592
|
-
struct ggml_tensor * b,
|
|
1561
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1562
|
+
struct ggml_tensor * b, // data
|
|
1593
1563
|
int s0, // stride
|
|
1594
1564
|
int p0, // padding
|
|
1595
1565
|
int d0); // dilation
|
|
@@ -1598,29 +1568,29 @@ extern "C" {
|
|
|
1598
1568
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
|
1599
1569
|
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
|
1600
1570
|
struct ggml_context * ctx,
|
|
1601
|
-
struct ggml_tensor * a,
|
|
1602
|
-
struct ggml_tensor * b,
|
|
1603
|
-
int s,
|
|
1604
|
-
int d);
|
|
1571
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1572
|
+
struct ggml_tensor * b, // data
|
|
1573
|
+
int s, // stride
|
|
1574
|
+
int d); // dilation
|
|
1605
1575
|
|
|
1606
1576
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
1607
1577
|
struct ggml_context * ctx,
|
|
1608
|
-
struct ggml_tensor * a,
|
|
1609
|
-
struct ggml_tensor * b,
|
|
1610
|
-
int s0,
|
|
1611
|
-
int p0,
|
|
1612
|
-
int d0);
|
|
1578
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1579
|
+
struct ggml_tensor * b, // data
|
|
1580
|
+
int s0, // stride
|
|
1581
|
+
int p0, // padding
|
|
1582
|
+
int d0); // dilation
|
|
1613
1583
|
|
|
1614
1584
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
|
1615
1585
|
struct ggml_context * ctx,
|
|
1616
|
-
struct ggml_tensor * a,
|
|
1617
|
-
struct ggml_tensor * b,
|
|
1618
|
-
int s0,
|
|
1619
|
-
int s1,
|
|
1620
|
-
int p0,
|
|
1621
|
-
int p1,
|
|
1622
|
-
int d0,
|
|
1623
|
-
int d1);
|
|
1586
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1587
|
+
struct ggml_tensor * b, // data
|
|
1588
|
+
int s0, // stride dimension 0
|
|
1589
|
+
int s1, // stride dimension 1
|
|
1590
|
+
int p0, // padding dimension 0
|
|
1591
|
+
int p1, // padding dimension 1
|
|
1592
|
+
int d0, // dilation dimension 0
|
|
1593
|
+
int d1); // dilation dimension 1
|
|
1624
1594
|
|
|
1625
1595
|
|
|
1626
1596
|
// kernel size is a->ne[0] x a->ne[1]
|
|
@@ -1682,6 +1652,18 @@ extern "C" {
|
|
|
1682
1652
|
float p0,
|
|
1683
1653
|
float p1);
|
|
1684
1654
|
|
|
1655
|
+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
|
|
1656
|
+
struct ggml_context * ctx,
|
|
1657
|
+
struct ggml_tensor * a,
|
|
1658
|
+
struct ggml_tensor * af, // "a"/input used in forward pass
|
|
1659
|
+
enum ggml_op_pool op,
|
|
1660
|
+
int k0,
|
|
1661
|
+
int k1,
|
|
1662
|
+
int s0,
|
|
1663
|
+
int s1,
|
|
1664
|
+
float p0,
|
|
1665
|
+
float p1);
|
|
1666
|
+
|
|
1685
1667
|
// nearest interpolate
|
|
1686
1668
|
// multiplies ne0 and ne1 by scale factor
|
|
1687
1669
|
// used in stable-diffusion
|
|
@@ -1756,12 +1738,16 @@ extern "C" {
|
|
|
1756
1738
|
struct ggml_tensor * v,
|
|
1757
1739
|
struct ggml_tensor * mask,
|
|
1758
1740
|
float scale,
|
|
1759
|
-
float max_bias
|
|
1741
|
+
float max_bias,
|
|
1742
|
+
float logit_softcap);
|
|
1760
1743
|
|
|
1761
1744
|
GGML_API void ggml_flash_attn_ext_set_prec(
|
|
1762
1745
|
struct ggml_tensor * a,
|
|
1763
1746
|
enum ggml_prec prec);
|
|
1764
1747
|
|
|
1748
|
+
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
|
1749
|
+
const struct ggml_tensor * a);
|
|
1750
|
+
|
|
1765
1751
|
// TODO: needs to be adapted to ggml_flash_attn_ext
|
|
1766
1752
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
|
1767
1753
|
struct ggml_context * ctx,
|
|
@@ -1773,10 +1759,8 @@ extern "C" {
|
|
|
1773
1759
|
|
|
1774
1760
|
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
|
1775
1761
|
struct ggml_context * ctx,
|
|
1776
|
-
struct ggml_tensor *
|
|
1777
|
-
struct ggml_tensor *
|
|
1778
|
-
struct ggml_tensor * c,
|
|
1779
|
-
struct ggml_tensor * sq);
|
|
1762
|
+
struct ggml_tensor * sx,
|
|
1763
|
+
struct ggml_tensor * c);
|
|
1780
1764
|
|
|
1781
1765
|
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
|
1782
1766
|
struct ggml_context * ctx,
|
|
@@ -1785,8 +1769,7 @@ extern "C" {
|
|
|
1785
1769
|
struct ggml_tensor * dt,
|
|
1786
1770
|
struct ggml_tensor * A,
|
|
1787
1771
|
struct ggml_tensor * B,
|
|
1788
|
-
struct ggml_tensor * C
|
|
1789
|
-
struct ggml_tensor * sq);
|
|
1772
|
+
struct ggml_tensor * C);
|
|
1790
1773
|
|
|
1791
1774
|
// partition into non-overlapping windows with padding if needed
|
|
1792
1775
|
// example:
|
|
@@ -1838,6 +1821,15 @@ extern "C" {
|
|
|
1838
1821
|
struct ggml_tensor * pw,
|
|
1839
1822
|
struct ggml_tensor * ph);
|
|
1840
1823
|
|
|
1824
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
|
|
1825
|
+
struct ggml_context * ctx,
|
|
1826
|
+
struct ggml_tensor * k,
|
|
1827
|
+
struct ggml_tensor * v,
|
|
1828
|
+
struct ggml_tensor * r,
|
|
1829
|
+
struct ggml_tensor * tf,
|
|
1830
|
+
struct ggml_tensor * td,
|
|
1831
|
+
struct ggml_tensor * state);
|
|
1832
|
+
|
|
1841
1833
|
// custom operators
|
|
1842
1834
|
|
|
1843
1835
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
@@ -1921,7 +1913,8 @@ extern "C" {
|
|
|
1921
1913
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
|
1922
1914
|
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
|
1923
1915
|
|
|
1924
|
-
|
|
1916
|
+
#define GGML_N_TASKS_MAX (-1)
|
|
1917
|
+
// n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
|
|
1925
1918
|
|
|
1926
1919
|
GGML_API struct ggml_tensor * ggml_map_custom1(
|
|
1927
1920
|
struct ggml_context * ctx,
|
|
@@ -1974,49 +1967,59 @@ extern "C" {
|
|
|
1974
1967
|
// loss function
|
|
1975
1968
|
|
|
1976
1969
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
|
1977
|
-
struct ggml_context
|
|
1978
|
-
struct ggml_tensor
|
|
1979
|
-
struct ggml_tensor
|
|
1970
|
+
struct ggml_context * ctx,
|
|
1971
|
+
struct ggml_tensor * a, // logits
|
|
1972
|
+
struct ggml_tensor * b); // labels
|
|
1980
1973
|
|
|
1981
1974
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
1982
|
-
struct ggml_context
|
|
1983
|
-
struct ggml_tensor
|
|
1984
|
-
struct ggml_tensor
|
|
1985
|
-
struct ggml_tensor
|
|
1975
|
+
struct ggml_context * ctx,
|
|
1976
|
+
struct ggml_tensor * a, // logits
|
|
1977
|
+
struct ggml_tensor * b, // labels
|
|
1978
|
+
struct ggml_tensor * c); // gradients of cross_entropy_loss result
|
|
1979
|
+
|
|
1980
|
+
// AdamW optimizer step
|
|
1981
|
+
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
|
|
1982
|
+
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
|
|
1983
|
+
GGML_API struct ggml_tensor * ggml_opt_step_adamw(
|
|
1984
|
+
struct ggml_context * ctx,
|
|
1985
|
+
struct ggml_tensor * a,
|
|
1986
|
+
struct ggml_tensor * grad,
|
|
1987
|
+
struct ggml_tensor * m,
|
|
1988
|
+
struct ggml_tensor * v,
|
|
1989
|
+
struct ggml_tensor * adamw_params); // parameters such a the learning rate
|
|
1986
1990
|
|
|
1987
1991
|
//
|
|
1988
1992
|
// automatic differentiation
|
|
1989
1993
|
//
|
|
1990
1994
|
|
|
1991
|
-
GGML_API void
|
|
1992
|
-
|
|
1993
|
-
|
|
1995
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
1996
|
+
GGML_API void ggml_build_backward_expand(
|
|
1997
|
+
struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
|
|
1998
|
+
struct ggml_context * ctx_compute, // context for gradient computation
|
|
1999
|
+
struct ggml_cgraph * cgraph,
|
|
2000
|
+
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
|
1994
2001
|
|
|
2002
|
+
// graph allocation in a context
|
|
2003
|
+
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
|
2004
|
+
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
|
2005
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
2006
|
+
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
|
2007
|
+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
|
2008
|
+
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
|
1995
2009
|
|
|
1996
|
-
GGML_API
|
|
1997
|
-
GGML_API
|
|
2010
|
+
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
|
|
2011
|
+
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
|
2012
|
+
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
|
|
2013
|
+
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
|
|
1998
2014
|
|
|
1999
|
-
|
|
2000
|
-
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
|
2001
|
-
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
|
2002
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
2003
|
-
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
|
2004
|
-
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
|
2005
|
-
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
|
2006
|
-
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
|
2015
|
+
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
2007
2016
|
|
|
2008
2017
|
GGML_API size_t ggml_graph_overhead(void);
|
|
2009
2018
|
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
|
2010
2019
|
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
GGML_API struct
|
|
2014
|
-
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
|
2015
|
-
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
|
2016
|
-
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
|
2017
|
-
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
|
2018
|
-
|
|
2019
|
-
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
|
2020
|
+
GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
|
|
2021
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2022
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2020
2023
|
|
|
2021
2024
|
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
2022
2025
|
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
@@ -2027,197 +2030,14 @@ extern "C" {
|
|
|
2027
2030
|
// dump the graph into a file using the dot format
|
|
2028
2031
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
|
2029
2032
|
|
|
2030
|
-
//
|
|
2031
|
-
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
|
2032
|
-
// but without the second forward pass nodes.
|
|
2033
|
-
GGML_API void ggml_build_backward_gradient_checkpointing(
|
|
2034
|
-
struct ggml_context * ctx,
|
|
2035
|
-
struct ggml_cgraph * gf,
|
|
2036
|
-
struct ggml_cgraph * gb,
|
|
2037
|
-
struct ggml_cgraph * gb_tmp,
|
|
2038
|
-
struct ggml_tensor * * checkpoints,
|
|
2039
|
-
int n_checkpoints);
|
|
2040
|
-
//
|
|
2041
|
-
// optimization
|
|
2042
|
-
//
|
|
2043
|
-
|
|
2044
|
-
// optimization methods
|
|
2045
|
-
enum ggml_opt_type {
|
|
2046
|
-
GGML_OPT_TYPE_ADAM,
|
|
2047
|
-
GGML_OPT_TYPE_LBFGS,
|
|
2048
|
-
};
|
|
2049
|
-
|
|
2050
|
-
// linesearch methods
|
|
2051
|
-
enum ggml_linesearch {
|
|
2052
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
|
2053
|
-
|
|
2054
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
|
2055
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
|
2056
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
|
2057
|
-
};
|
|
2058
|
-
|
|
2059
|
-
// optimization return values
|
|
2060
|
-
enum ggml_opt_result {
|
|
2061
|
-
GGML_OPT_RESULT_OK = 0,
|
|
2062
|
-
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
|
2063
|
-
GGML_OPT_RESULT_NO_CONTEXT,
|
|
2064
|
-
GGML_OPT_RESULT_INVALID_WOLFE,
|
|
2065
|
-
GGML_OPT_RESULT_FAIL,
|
|
2066
|
-
GGML_OPT_RESULT_CANCEL,
|
|
2067
|
-
|
|
2068
|
-
GGML_LINESEARCH_FAIL = -128,
|
|
2069
|
-
GGML_LINESEARCH_MINIMUM_STEP,
|
|
2070
|
-
GGML_LINESEARCH_MAXIMUM_STEP,
|
|
2071
|
-
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
|
2072
|
-
GGML_LINESEARCH_INVALID_PARAMETERS,
|
|
2073
|
-
};
|
|
2074
|
-
|
|
2075
|
-
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
|
2033
|
+
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
|
|
2076
2034
|
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
|
2077
2035
|
|
|
2078
|
-
//
|
|
2079
|
-
//
|
|
2080
|
-
|
|
2081
|
-
//
|
|
2082
|
-
struct ggml_opt_params {
|
|
2083
|
-
enum ggml_opt_type type;
|
|
2084
|
-
|
|
2085
|
-
size_t graph_size;
|
|
2086
|
-
|
|
2087
|
-
int n_threads;
|
|
2088
|
-
|
|
2089
|
-
// delta-based convergence test
|
|
2090
|
-
//
|
|
2091
|
-
// if past == 0 - disabled
|
|
2092
|
-
// if past > 0:
|
|
2093
|
-
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
|
2094
|
-
//
|
|
2095
|
-
int past;
|
|
2096
|
-
float delta;
|
|
2097
|
-
|
|
2098
|
-
// maximum number of iterations without improvement
|
|
2099
|
-
//
|
|
2100
|
-
// if 0 - disabled
|
|
2101
|
-
// if > 0:
|
|
2102
|
-
// assume convergence if no cost improvement in this number of iterations
|
|
2103
|
-
//
|
|
2104
|
-
int max_no_improvement;
|
|
2105
|
-
|
|
2106
|
-
bool print_forward_graph;
|
|
2107
|
-
bool print_backward_graph;
|
|
2108
|
-
|
|
2109
|
-
int n_gradient_accumulation;
|
|
2110
|
-
|
|
2111
|
-
// ADAM parameters
|
|
2112
|
-
struct {
|
|
2113
|
-
int n_iter;
|
|
2114
|
-
|
|
2115
|
-
float sched; // schedule multiplier (fixed, decay or warmup)
|
|
2116
|
-
float decay; // weight decay for AdamW, use 0.0f to disable
|
|
2117
|
-
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
|
2118
|
-
float alpha; // learning rate
|
|
2119
|
-
float beta1;
|
|
2120
|
-
float beta2;
|
|
2121
|
-
float eps; // epsilon for numerical stability
|
|
2122
|
-
float eps_f; // epsilon for convergence test
|
|
2123
|
-
float eps_g; // epsilon for convergence test
|
|
2124
|
-
float gclip; // gradient clipping
|
|
2125
|
-
} adam;
|
|
2126
|
-
|
|
2127
|
-
// LBFGS parameters
|
|
2128
|
-
struct {
|
|
2129
|
-
int m; // number of corrections to approximate the inv. Hessian
|
|
2130
|
-
int n_iter;
|
|
2131
|
-
int max_linesearch;
|
|
2132
|
-
|
|
2133
|
-
float eps; // convergence tolerance
|
|
2134
|
-
float ftol; // line search tolerance
|
|
2135
|
-
float wolfe;
|
|
2136
|
-
float min_step;
|
|
2137
|
-
float max_step;
|
|
2138
|
-
|
|
2139
|
-
enum ggml_linesearch linesearch;
|
|
2140
|
-
} lbfgs;
|
|
2141
|
-
};
|
|
2142
|
-
|
|
2143
|
-
struct ggml_opt_context {
|
|
2144
|
-
struct ggml_context * ctx;
|
|
2145
|
-
struct ggml_opt_params params;
|
|
2146
|
-
|
|
2147
|
-
int iter;
|
|
2148
|
-
int64_t nx; // number of parameter elements
|
|
2149
|
-
|
|
2150
|
-
bool just_initialized;
|
|
2151
|
-
|
|
2152
|
-
float loss_before;
|
|
2153
|
-
float loss_after;
|
|
2154
|
-
|
|
2155
|
-
struct {
|
|
2156
|
-
struct ggml_tensor * g; // current gradient
|
|
2157
|
-
struct ggml_tensor * m; // first moment
|
|
2158
|
-
struct ggml_tensor * v; // second moment
|
|
2159
|
-
struct ggml_tensor * pf; // past function values
|
|
2160
|
-
float fx_best;
|
|
2161
|
-
float fx_prev;
|
|
2162
|
-
int n_no_improvement;
|
|
2163
|
-
} adam;
|
|
2164
|
-
|
|
2165
|
-
struct {
|
|
2166
|
-
struct ggml_tensor * x; // current parameters
|
|
2167
|
-
struct ggml_tensor * xp; // previous parameters
|
|
2168
|
-
struct ggml_tensor * g; // current gradient
|
|
2169
|
-
struct ggml_tensor * gp; // previous gradient
|
|
2170
|
-
struct ggml_tensor * d; // search direction
|
|
2171
|
-
struct ggml_tensor * pf; // past function values
|
|
2172
|
-
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
|
2173
|
-
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
|
2174
|
-
struct ggml_tensor * lms; // the L-BFGS memory s
|
|
2175
|
-
struct ggml_tensor * lmy; // the L-BFGS memory y
|
|
2176
|
-
float fx_best;
|
|
2177
|
-
float step;
|
|
2178
|
-
int j;
|
|
2179
|
-
int k;
|
|
2180
|
-
int end;
|
|
2181
|
-
int n_no_improvement;
|
|
2182
|
-
} lbfgs;
|
|
2183
|
-
};
|
|
2184
|
-
|
|
2185
|
-
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
|
2186
|
-
|
|
2187
|
-
// optimize the function defined by the tensor f
|
|
2188
|
-
GGML_API enum ggml_opt_result ggml_opt(
|
|
2189
|
-
struct ggml_context * ctx,
|
|
2190
|
-
struct ggml_opt_params params,
|
|
2191
|
-
struct ggml_tensor * f);
|
|
2192
|
-
|
|
2193
|
-
// initialize optimizer context
|
|
2194
|
-
GGML_API void ggml_opt_init(
|
|
2195
|
-
struct ggml_context * ctx,
|
|
2196
|
-
struct ggml_opt_context * opt,
|
|
2197
|
-
struct ggml_opt_params params,
|
|
2198
|
-
int64_t nx);
|
|
2199
|
-
|
|
2200
|
-
// continue optimizing the function defined by the tensor f
|
|
2201
|
-
GGML_API enum ggml_opt_result ggml_opt_resume(
|
|
2202
|
-
struct ggml_context * ctx,
|
|
2203
|
-
struct ggml_opt_context * opt,
|
|
2204
|
-
struct ggml_tensor * f);
|
|
2205
|
-
|
|
2206
|
-
// continue optimizing the function defined by the tensor f
|
|
2207
|
-
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
|
2208
|
-
struct ggml_context * ctx,
|
|
2209
|
-
struct ggml_opt_context * opt,
|
|
2210
|
-
struct ggml_tensor * f,
|
|
2211
|
-
struct ggml_cgraph * gf,
|
|
2212
|
-
struct ggml_cgraph * gb,
|
|
2213
|
-
ggml_opt_callback callback,
|
|
2214
|
-
void * callback_data);
|
|
2036
|
+
// Set callback for all future logging events.
|
|
2037
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
|
2038
|
+
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
|
2215
2039
|
|
|
2216
|
-
|
|
2217
|
-
// tensor flags
|
|
2218
|
-
//
|
|
2219
|
-
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
2220
|
-
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
2040
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
2221
2041
|
|
|
2222
2042
|
//
|
|
2223
2043
|
// quantization
|
|
@@ -2374,43 +2194,6 @@ extern "C" {
|
|
|
2374
2194
|
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
|
2375
2195
|
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
|
2376
2196
|
|
|
2377
|
-
//
|
|
2378
|
-
// system info
|
|
2379
|
-
//
|
|
2380
|
-
|
|
2381
|
-
GGML_API int ggml_cpu_has_avx (void);
|
|
2382
|
-
GGML_API int ggml_cpu_has_avx_vnni (void);
|
|
2383
|
-
GGML_API int ggml_cpu_has_avx2 (void);
|
|
2384
|
-
GGML_API int ggml_cpu_has_avx512 (void);
|
|
2385
|
-
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
|
2386
|
-
GGML_API int ggml_cpu_has_avx512_vnni(void);
|
|
2387
|
-
GGML_API int ggml_cpu_has_avx512_bf16(void);
|
|
2388
|
-
GGML_API int ggml_cpu_has_fma (void);
|
|
2389
|
-
GGML_API int ggml_cpu_has_neon (void);
|
|
2390
|
-
GGML_API int ggml_cpu_has_sve (void);
|
|
2391
|
-
GGML_API int ggml_cpu_has_arm_fma (void);
|
|
2392
|
-
GGML_API int ggml_cpu_has_metal (void);
|
|
2393
|
-
GGML_API int ggml_cpu_has_f16c (void);
|
|
2394
|
-
GGML_API int ggml_cpu_has_fp16_va (void);
|
|
2395
|
-
GGML_API int ggml_cpu_has_wasm_simd (void);
|
|
2396
|
-
GGML_API int ggml_cpu_has_blas (void);
|
|
2397
|
-
GGML_API int ggml_cpu_has_cuda (void);
|
|
2398
|
-
GGML_API int ggml_cpu_has_vulkan (void);
|
|
2399
|
-
GGML_API int ggml_cpu_has_kompute (void);
|
|
2400
|
-
GGML_API int ggml_cpu_has_gpublas (void);
|
|
2401
|
-
GGML_API int ggml_cpu_has_sse3 (void);
|
|
2402
|
-
GGML_API int ggml_cpu_has_ssse3 (void);
|
|
2403
|
-
GGML_API int ggml_cpu_has_sycl (void);
|
|
2404
|
-
GGML_API int ggml_cpu_has_rpc (void);
|
|
2405
|
-
GGML_API int ggml_cpu_has_vsx (void);
|
|
2406
|
-
GGML_API int ggml_cpu_has_matmul_int8(void);
|
|
2407
|
-
GGML_API int ggml_cpu_has_cann (void);
|
|
2408
|
-
GGML_API int ggml_cpu_has_llamafile (void);
|
|
2409
|
-
|
|
2410
|
-
//
|
|
2411
|
-
// Internal types and functions exposed for tests and benchmarks
|
|
2412
|
-
//
|
|
2413
|
-
|
|
2414
2197
|
#ifdef __cplusplus
|
|
2415
2198
|
// restrict not standard in C++
|
|
2416
2199
|
#define GGML_RESTRICT
|
|
@@ -2419,34 +2202,18 @@ extern "C" {
|
|
|
2419
2202
|
#endif
|
|
2420
2203
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2421
2204
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
|
2425
|
-
const void * GGML_RESTRICT y, size_t by, int nrc);
|
|
2426
|
-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2427
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2428
|
-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2429
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2430
|
-
|
|
2431
|
-
typedef struct {
|
|
2205
|
+
|
|
2206
|
+
struct ggml_type_traits {
|
|
2432
2207
|
const char * type_name;
|
|
2433
2208
|
int64_t blck_size;
|
|
2434
2209
|
int64_t blck_size_interleave; // interleave elements in blocks
|
|
2435
2210
|
size_t type_size;
|
|
2436
2211
|
bool is_quantized;
|
|
2437
2212
|
ggml_to_float_t to_float;
|
|
2438
|
-
ggml_from_float_t from_float;
|
|
2439
2213
|
ggml_from_float_t from_float_ref;
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
int64_t nrows; // number of rows to process simultaneously
|
|
2444
|
-
int64_t ncols; // number of columns to process simultaneously
|
|
2445
|
-
ggml_gemv_t gemv;
|
|
2446
|
-
ggml_gemm_t gemm;
|
|
2447
|
-
} ggml_type_traits_t;
|
|
2448
|
-
|
|
2449
|
-
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
|
2214
|
+
};
|
|
2215
|
+
|
|
2216
|
+
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
|
2450
2217
|
|
|
2451
2218
|
#ifdef __cplusplus
|
|
2452
2219
|
}
|