@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -32,7 +32,15 @@ else()
|
|
|
32
32
|
endif()
|
|
33
33
|
endif()
|
|
34
34
|
|
|
35
|
+
# remove the lib prefix on win32 mingw
|
|
36
|
+
if (WIN32)
|
|
37
|
+
set(CMAKE_STATIC_LIBRARY_PREFIX "")
|
|
38
|
+
set(CMAKE_SHARED_LIBRARY_PREFIX "")
|
|
39
|
+
set(CMAKE_SHARED_MODULE_PREFIX "")
|
|
40
|
+
endif()
|
|
41
|
+
|
|
35
42
|
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
|
43
|
+
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
|
|
36
44
|
|
|
37
45
|
#
|
|
38
46
|
# option list
|
|
@@ -66,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
|
|
66
74
|
endif()
|
|
67
75
|
|
|
68
76
|
# general
|
|
69
|
-
option(GGML_STATIC "ggml: static link libraries"
|
|
70
|
-
option(GGML_NATIVE "ggml:
|
|
71
|
-
option(GGML_LTO "ggml: enable link time optimization"
|
|
72
|
-
option(GGML_CCACHE "ggml: use ccache if available"
|
|
77
|
+
option(GGML_STATIC "ggml: static link libraries" OFF)
|
|
78
|
+
option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
|
|
79
|
+
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
|
80
|
+
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
|
73
81
|
|
|
74
82
|
# debug
|
|
75
83
|
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
|
@@ -91,28 +99,34 @@ else()
|
|
|
91
99
|
set(INS_ENB ON)
|
|
92
100
|
endif()
|
|
93
101
|
|
|
94
|
-
option(GGML_CPU_HBM
|
|
95
|
-
option(GGML_CPU_AARCH64
|
|
96
|
-
|
|
97
|
-
option(
|
|
98
|
-
option(GGML_AVX2
|
|
99
|
-
option(GGML_AVX512
|
|
100
|
-
option(GGML_AVX512_VBMI
|
|
101
|
-
option(GGML_AVX512_VNNI
|
|
102
|
-
option(GGML_AVX512_BF16
|
|
103
|
-
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
|
104
|
-
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
|
105
|
-
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
|
106
|
-
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
|
102
|
+
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
|
103
|
+
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
104
|
+
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
|
105
|
+
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
|
106
|
+
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
|
107
|
+
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
|
|
108
|
+
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
|
109
|
+
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
|
110
|
+
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
|
107
111
|
if (NOT MSVC)
|
|
108
|
-
|
|
112
|
+
# in MSVC F16C and FMA is implied with AVX2/AVX512
|
|
113
|
+
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
|
114
|
+
option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
|
|
115
|
+
# MSVC does not seem to support AMX
|
|
116
|
+
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
|
117
|
+
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
|
118
|
+
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
|
109
119
|
endif()
|
|
110
|
-
option(GGML_LASX
|
|
111
|
-
option(GGML_LSX
|
|
112
|
-
option(
|
|
120
|
+
option(GGML_LASX "ggml: enable lasx" ON)
|
|
121
|
+
option(GGML_LSX "ggml: enable lsx" ON)
|
|
122
|
+
option(GGML_RVV "ggml: enable rvv" ON)
|
|
123
|
+
|
|
124
|
+
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
125
|
+
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
126
|
+
|
|
113
127
|
|
|
114
128
|
if (WIN32)
|
|
115
|
-
set(GGML_WIN_VER "0x602" CACHE STRING
|
|
129
|
+
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
|
|
116
130
|
endif()
|
|
117
131
|
|
|
118
132
|
# ggml core
|
|
@@ -159,11 +173,17 @@ set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
|
|
159
173
|
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
|
160
174
|
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
|
161
175
|
option(GGML_RPC "ggml: use RPC" OFF)
|
|
162
|
-
option(GGML_AMX "ggml: use AMX" OFF)
|
|
163
176
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
|
164
177
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
|
165
178
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
|
166
179
|
"ggml: sycl target device")
|
|
180
|
+
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
181
|
+
"ggml: sycl device architecture")
|
|
182
|
+
|
|
183
|
+
option(GGML_OPENCL "ggml: use OpenCL" OFF)
|
|
184
|
+
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
|
|
185
|
+
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
|
186
|
+
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
|
167
187
|
|
|
168
188
|
# extra artifacts
|
|
169
189
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
|
@@ -176,11 +196,7 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
|
|
176
196
|
set(CMAKE_C_STANDARD 11)
|
|
177
197
|
set(CMAKE_C_STANDARD_REQUIRED true)
|
|
178
198
|
|
|
179
|
-
|
|
180
|
-
set(CMAKE_CXX_STANDARD 17)
|
|
181
|
-
else()
|
|
182
|
-
set(CMAKE_CXX_STANDARD 11)
|
|
183
|
-
endif()
|
|
199
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
184
200
|
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
|
185
201
|
|
|
186
202
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|
@@ -233,12 +249,8 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
|
233
249
|
#if (GGML_METAL)
|
|
234
250
|
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
|
235
251
|
#endif()
|
|
236
|
-
install(TARGETS ggml PUBLIC_HEADER)
|
|
237
|
-
|
|
238
|
-
if (BUILD_SHARED_LIBS)
|
|
239
|
-
install(TARGETS ggml LIBRARY)
|
|
240
|
-
install(TARGETS ggml-base LIBRARY)
|
|
241
|
-
endif()
|
|
252
|
+
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
|
|
253
|
+
install(TARGETS ggml-base LIBRARY)
|
|
242
254
|
|
|
243
255
|
# FIXME: this should be done in the backend cmake files
|
|
244
256
|
if (GGML_METAL)
|
|
@@ -190,6 +190,14 @@ extern "C" {
|
|
|
190
190
|
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
|
191
191
|
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
|
192
192
|
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
|
193
|
+
// Set the abort callback for the backend
|
|
194
|
+
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
|
195
|
+
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
|
196
|
+
struct ggml_backend_feature {
|
|
197
|
+
const char * name;
|
|
198
|
+
const char * value;
|
|
199
|
+
};
|
|
200
|
+
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
|
193
201
|
|
|
194
202
|
//
|
|
195
203
|
// Backend registry
|
|
@@ -214,6 +222,14 @@ extern "C" {
|
|
|
214
222
|
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
|
215
223
|
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
|
216
224
|
|
|
225
|
+
// Load a backend from a dynamic library and register it
|
|
226
|
+
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
|
227
|
+
// Unload a backend if loaded dynamically and unregister it
|
|
228
|
+
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
|
229
|
+
// Load all known backends from dynamic libraries
|
|
230
|
+
GGML_API void ggml_backend_load_all(void);
|
|
231
|
+
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
|
|
232
|
+
|
|
217
233
|
//
|
|
218
234
|
// Backend scheduler
|
|
219
235
|
//
|
|
@@ -7,29 +7,6 @@
|
|
|
7
7
|
extern "C" {
|
|
8
8
|
#endif
|
|
9
9
|
|
|
10
|
-
// Scheduling priorities
|
|
11
|
-
enum ggml_sched_priority {
|
|
12
|
-
GGML_SCHED_PRIO_NORMAL,
|
|
13
|
-
GGML_SCHED_PRIO_MEDIUM,
|
|
14
|
-
GGML_SCHED_PRIO_HIGH,
|
|
15
|
-
GGML_SCHED_PRIO_REALTIME
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
// Threadpool params
|
|
19
|
-
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
|
20
|
-
struct ggml_threadpool_params {
|
|
21
|
-
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
22
|
-
int n_threads; // number of threads
|
|
23
|
-
enum ggml_sched_priority prio; // thread priority
|
|
24
|
-
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
25
|
-
bool strict_cpu; // strict cpu placement
|
|
26
|
-
bool paused; // start in paused state
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
struct ggml_threadpool; // forward declaration, see ggml.c
|
|
30
|
-
|
|
31
|
-
typedef struct ggml_threadpool * ggml_threadpool_t;
|
|
32
|
-
|
|
33
10
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
|
34
11
|
// since https://github.com/ggerganov/ggml/issues/287
|
|
35
12
|
struct ggml_cplan {
|
|
@@ -75,14 +52,11 @@ extern "C" {
|
|
|
75
52
|
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
|
76
53
|
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
|
77
54
|
|
|
78
|
-
GGML_BACKEND_API struct
|
|
79
|
-
GGML_BACKEND_API void
|
|
80
|
-
GGML_BACKEND_API
|
|
81
|
-
GGML_BACKEND_API struct ggml_threadpool *
|
|
82
|
-
GGML_BACKEND_API void
|
|
83
|
-
GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
|
|
84
|
-
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
|
85
|
-
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
|
55
|
+
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
|
56
|
+
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
|
57
|
+
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
|
|
58
|
+
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
|
59
|
+
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
|
86
60
|
|
|
87
61
|
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
|
88
62
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
@@ -104,10 +78,10 @@ extern "C" {
|
|
|
104
78
|
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
|
105
79
|
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
|
106
80
|
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
|
81
|
+
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
|
107
82
|
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
|
108
83
|
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
|
109
84
|
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
|
110
|
-
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
|
111
85
|
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
|
112
86
|
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
|
113
87
|
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
|
@@ -117,6 +91,7 @@ extern "C" {
|
|
|
117
91
|
GGML_BACKEND_API int ggml_cpu_has_neon (void);
|
|
118
92
|
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
|
|
119
93
|
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
|
|
94
|
+
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
|
|
120
95
|
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
|
121
96
|
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
|
122
97
|
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
|
@@ -128,24 +103,14 @@ extern "C" {
|
|
|
128
103
|
|
|
129
104
|
// Internal types and functions exposed for tests and benchmarks
|
|
130
105
|
|
|
131
|
-
typedef void (*ggml_from_float_to_mat_t)
|
|
132
|
-
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
|
|
133
106
|
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
|
134
107
|
const void * GGML_RESTRICT y, size_t by, int nrc);
|
|
135
|
-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
136
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
137
|
-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
138
|
-
const void * GGML_RESTRICT y, int nr, int nc);
|
|
139
108
|
|
|
140
109
|
struct ggml_type_traits_cpu {
|
|
141
110
|
ggml_from_float_t from_float;
|
|
142
|
-
ggml_from_float_to_mat_t from_float_to_mat;
|
|
143
111
|
ggml_vec_dot_t vec_dot;
|
|
144
112
|
enum ggml_type vec_dot_type;
|
|
145
113
|
int64_t nrows; // number of rows to process simultaneously
|
|
146
|
-
int64_t ncols; // number of columns to process simultaneously
|
|
147
|
-
ggml_gemv_t gemv;
|
|
148
|
-
ggml_gemm_t gemm;
|
|
149
114
|
};
|
|
150
115
|
|
|
151
116
|
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
|
|
@@ -165,13 +130,6 @@ extern "C" {
|
|
|
165
130
|
|
|
166
131
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
167
132
|
|
|
168
|
-
#ifdef GGML_USE_CPU_HBM
|
|
169
|
-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
|
170
|
-
#endif
|
|
171
|
-
|
|
172
|
-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
|
|
173
|
-
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
|
|
174
|
-
|
|
175
133
|
#ifdef __cplusplus
|
|
176
134
|
}
|
|
177
135
|
#endif
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#ifndef GGML_OPENCL_H
|
|
2
|
+
#define GGML_OPENCL_H
|
|
3
|
+
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
#include "ggml-backend.h"
|
|
6
|
+
|
|
7
|
+
#ifdef __cplusplus
|
|
8
|
+
extern "C" {
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
//
|
|
12
|
+
// backend API
|
|
13
|
+
//
|
|
14
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
|
|
15
|
+
GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
|
16
|
+
|
|
17
|
+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
|
18
|
+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
|
19
|
+
|
|
20
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
|
|
21
|
+
|
|
22
|
+
#ifdef __cplusplus
|
|
23
|
+
}
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
#endif // GGML_OPENCL_H
|
|
@@ -237,7 +237,9 @@
|
|
|
237
237
|
#define GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
-
#define GGML_ROPE_TYPE_NEOX
|
|
240
|
+
#define GGML_ROPE_TYPE_NEOX 2
|
|
241
|
+
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
|
+
#define GGML_ROPE_TYPE_VISION 24
|
|
241
243
|
|
|
242
244
|
#define GGUF_MAGIC "GGUF"
|
|
243
245
|
|
|
@@ -384,12 +386,15 @@ extern "C" {
|
|
|
384
386
|
GGML_TYPE_F64 = 28,
|
|
385
387
|
GGML_TYPE_IQ1_M = 29,
|
|
386
388
|
GGML_TYPE_BF16 = 30,
|
|
387
|
-
GGML_TYPE_Q4_0_4_4 = 31,
|
|
388
|
-
GGML_TYPE_Q4_0_4_8 = 32,
|
|
389
|
-
GGML_TYPE_Q4_0_8_8 = 33,
|
|
389
|
+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
|
390
|
+
// GGML_TYPE_Q4_0_4_8 = 32,
|
|
391
|
+
// GGML_TYPE_Q4_0_8_8 = 33,
|
|
390
392
|
GGML_TYPE_TQ1_0 = 34,
|
|
391
393
|
GGML_TYPE_TQ2_0 = 35,
|
|
392
|
-
|
|
394
|
+
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
|
395
|
+
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
396
|
+
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
397
|
+
GGML_TYPE_COUNT = 39,
|
|
393
398
|
};
|
|
394
399
|
|
|
395
400
|
// precision
|
|
@@ -430,9 +435,6 @@ extern "C" {
|
|
|
430
435
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
431
436
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
432
437
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
433
|
-
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
|
|
434
|
-
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
|
|
435
|
-
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
|
|
436
438
|
};
|
|
437
439
|
|
|
438
440
|
// available tensor operations:
|
|
@@ -496,6 +498,7 @@ extern "C" {
|
|
|
496
498
|
GGML_OP_POOL_2D_BACK,
|
|
497
499
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
498
500
|
GGML_OP_PAD,
|
|
501
|
+
GGML_OP_PAD_REFLECT_1D,
|
|
499
502
|
GGML_OP_ARANGE,
|
|
500
503
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
501
504
|
GGML_OP_ARGSORT,
|
|
@@ -1442,6 +1445,22 @@ extern "C" {
|
|
|
1442
1445
|
float beta_fast,
|
|
1443
1446
|
float beta_slow);
|
|
1444
1447
|
|
|
1448
|
+
GGML_API struct ggml_tensor * ggml_rope_multi(
|
|
1449
|
+
struct ggml_context * ctx,
|
|
1450
|
+
struct ggml_tensor * a,
|
|
1451
|
+
struct ggml_tensor * b,
|
|
1452
|
+
struct ggml_tensor * c,
|
|
1453
|
+
int n_dims,
|
|
1454
|
+
int sections[4],
|
|
1455
|
+
int mode,
|
|
1456
|
+
int n_ctx_orig,
|
|
1457
|
+
float freq_base,
|
|
1458
|
+
float freq_scale,
|
|
1459
|
+
float ext_factor,
|
|
1460
|
+
float attn_factor,
|
|
1461
|
+
float beta_fast,
|
|
1462
|
+
float beta_slow);
|
|
1463
|
+
|
|
1445
1464
|
// in-place, returns view(a)
|
|
1446
1465
|
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
|
1447
1466
|
struct ggml_context * ctx,
|
|
@@ -1545,17 +1564,6 @@ extern "C" {
|
|
|
1545
1564
|
int d1, // dilation dimension 1
|
|
1546
1565
|
bool is_2D);
|
|
1547
1566
|
|
|
1548
|
-
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
|
1549
|
-
struct ggml_context * ctx,
|
|
1550
|
-
struct ggml_tensor * a, // convolution kernel
|
|
1551
|
-
struct ggml_tensor * b, // data
|
|
1552
|
-
int s0, // stride dimension 0
|
|
1553
|
-
int s1, // stride dimension 1
|
|
1554
|
-
int p0, // padding dimension 0
|
|
1555
|
-
int p1, // padding dimension 1
|
|
1556
|
-
int d0, // dilation dimension 0
|
|
1557
|
-
int d1); // dilation dimension 1
|
|
1558
|
-
|
|
1559
1567
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
1560
1568
|
struct ggml_context * ctx,
|
|
1561
1569
|
struct ggml_tensor * a, // convolution kernel
|
|
@@ -1573,6 +1581,23 @@ extern "C" {
|
|
|
1573
1581
|
int s, // stride
|
|
1574
1582
|
int d); // dilation
|
|
1575
1583
|
|
|
1584
|
+
// depthwise
|
|
1585
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
|
1586
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw(
|
|
1587
|
+
struct ggml_context * ctx,
|
|
1588
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1589
|
+
struct ggml_tensor * b, // data
|
|
1590
|
+
int s0, // stride
|
|
1591
|
+
int p0, // padding
|
|
1592
|
+
int d0); // dilation
|
|
1593
|
+
|
|
1594
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
|
|
1595
|
+
struct ggml_context * ctx,
|
|
1596
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1597
|
+
struct ggml_tensor * b, // data
|
|
1598
|
+
int s0, // stride
|
|
1599
|
+
int d0); // dilation
|
|
1600
|
+
|
|
1576
1601
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
1577
1602
|
struct ggml_context * ctx,
|
|
1578
1603
|
struct ggml_tensor * a, // convolution kernel
|
|
@@ -1592,7 +1617,6 @@ extern "C" {
|
|
|
1592
1617
|
int d0, // dilation dimension 0
|
|
1593
1618
|
int d1); // dilation dimension 1
|
|
1594
1619
|
|
|
1595
|
-
|
|
1596
1620
|
// kernel size is a->ne[0] x a->ne[1]
|
|
1597
1621
|
// stride is equal to kernel size
|
|
1598
1622
|
// padding is zero
|
|
@@ -1619,6 +1643,18 @@ extern "C" {
|
|
|
1619
1643
|
struct ggml_tensor * a,
|
|
1620
1644
|
struct ggml_tensor * b);
|
|
1621
1645
|
|
|
1646
|
+
// depthwise
|
|
1647
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
|
1648
|
+
struct ggml_context * ctx,
|
|
1649
|
+
struct ggml_tensor * a, // convolution kernel
|
|
1650
|
+
struct ggml_tensor * b, // data
|
|
1651
|
+
int s0, // stride dimension 0
|
|
1652
|
+
int s1, // stride dimension 1
|
|
1653
|
+
int p0, // padding dimension 0
|
|
1654
|
+
int p1, // padding dimension 1
|
|
1655
|
+
int d0, // dilation dimension 0
|
|
1656
|
+
int d1); // dilation dimension 1
|
|
1657
|
+
|
|
1622
1658
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
1623
1659
|
struct ggml_context * ctx,
|
|
1624
1660
|
struct ggml_tensor * a,
|
|
@@ -1692,6 +1728,13 @@ extern "C" {
|
|
|
1692
1728
|
int p2,
|
|
1693
1729
|
int p3);
|
|
1694
1730
|
|
|
1731
|
+
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
1732
|
+
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
|
1733
|
+
struct ggml_context * ctx,
|
|
1734
|
+
struct ggml_tensor * a,
|
|
1735
|
+
int p0,
|
|
1736
|
+
int p1);
|
|
1737
|
+
|
|
1695
1738
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1696
1739
|
// timesteps: [N,]
|
|
1697
1740
|
// return: [N, dim]
|
|
@@ -2194,11 +2237,19 @@ extern "C" {
|
|
|
2194
2237
|
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
|
2195
2238
|
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
|
2196
2239
|
|
|
2197
|
-
#ifdef
|
|
2198
|
-
// restrict not standard in C++
|
|
2199
|
-
#
|
|
2240
|
+
#ifdef __cplusplus
|
|
2241
|
+
// restrict not standard in C++
|
|
2242
|
+
# if defined(__GNUC__)
|
|
2243
|
+
# define GGML_RESTRICT __restrict__
|
|
2244
|
+
# elif defined(__clang__)
|
|
2245
|
+
# define GGML_RESTRICT __restrict
|
|
2246
|
+
# elif defined(_MSC_VER)
|
|
2247
|
+
# define GGML_RESTRICT __restrict
|
|
2248
|
+
# else
|
|
2249
|
+
# define GGML_RESTRICT
|
|
2250
|
+
# endif
|
|
2200
2251
|
#else
|
|
2201
|
-
#define GGML_RESTRICT restrict
|
|
2252
|
+
# define GGML_RESTRICT restrict
|
|
2202
2253
|
#endif
|
|
2203
2254
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2204
2255
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
@@ -2215,6 +2266,37 @@ extern "C" {
|
|
|
2215
2266
|
|
|
2216
2267
|
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
|
2217
2268
|
|
|
2269
|
+
// ggml threadpool
|
|
2270
|
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
|
2271
|
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
|
2272
|
+
|
|
2273
|
+
// scheduling priorities
|
|
2274
|
+
enum ggml_sched_priority {
|
|
2275
|
+
GGML_SCHED_PRIO_NORMAL,
|
|
2276
|
+
GGML_SCHED_PRIO_MEDIUM,
|
|
2277
|
+
GGML_SCHED_PRIO_HIGH,
|
|
2278
|
+
GGML_SCHED_PRIO_REALTIME
|
|
2279
|
+
};
|
|
2280
|
+
|
|
2281
|
+
// threadpool params
|
|
2282
|
+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
|
2283
|
+
struct ggml_threadpool_params {
|
|
2284
|
+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
2285
|
+
int n_threads; // number of threads
|
|
2286
|
+
enum ggml_sched_priority prio; // thread priority
|
|
2287
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
2288
|
+
bool strict_cpu; // strict cpu placement
|
|
2289
|
+
bool paused; // start in paused state
|
|
2290
|
+
};
|
|
2291
|
+
|
|
2292
|
+
struct ggml_threadpool; // forward declaration, see ggml.c
|
|
2293
|
+
|
|
2294
|
+
typedef struct ggml_threadpool * ggml_threadpool_t;
|
|
2295
|
+
|
|
2296
|
+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
|
2297
|
+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
|
2298
|
+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
|
2299
|
+
|
|
2218
2300
|
#ifdef __cplusplus
|
|
2219
2301
|
}
|
|
2220
2302
|
#endif
|
|
@@ -24,7 +24,7 @@ if (NOT MSVC)
|
|
|
24
24
|
endif()
|
|
25
25
|
endif()
|
|
26
26
|
|
|
27
|
-
function(
|
|
27
|
+
function(ggml_get_flags CCID CCVER)
|
|
28
28
|
set(C_FLAGS "")
|
|
29
29
|
set(CXX_FLAGS "")
|
|
30
30
|
|
|
@@ -41,6 +41,7 @@ function(get_flags CCID CCVER)
|
|
|
41
41
|
elseif (CCID STREQUAL "GNU")
|
|
42
42
|
set(C_FLAGS -Wdouble-promotion)
|
|
43
43
|
set(CXX_FLAGS -Wno-array-bounds)
|
|
44
|
+
|
|
44
45
|
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
|
45
46
|
list(APPEND CXX_FLAGS -Wextra-semi)
|
|
46
47
|
endif()
|
|
@@ -69,7 +70,7 @@ if (GGML_ALL_WARNINGS)
|
|
|
69
70
|
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
|
70
71
|
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
|
73
74
|
|
|
74
75
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
|
75
76
|
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
|
@@ -193,15 +194,14 @@ endif()
|
|
|
193
194
|
|
|
194
195
|
if (WIN32)
|
|
195
196
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
|
196
|
-
|
|
197
|
-
if (BUILD_SHARED_LIBS)
|
|
198
|
-
# TODO: should not use this
|
|
199
|
-
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
|
200
|
-
endif()
|
|
201
197
|
endif()
|
|
202
198
|
|
|
203
199
|
# ggml
|
|
204
200
|
|
|
201
|
+
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
|
|
202
|
+
message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
|
|
203
|
+
endif()
|
|
204
|
+
|
|
205
205
|
add_library(ggml-base
|
|
206
206
|
../include/ggml.h
|
|
207
207
|
../include/ggml-alloc.h
|
|
@@ -215,9 +215,7 @@ add_library(ggml-base
|
|
|
215
215
|
ggml-threading.cpp
|
|
216
216
|
ggml-threading.h
|
|
217
217
|
ggml-quants.c
|
|
218
|
-
ggml-quants.h
|
|
219
|
-
ggml-aarch64.c
|
|
220
|
-
ggml-aarch64.h)
|
|
218
|
+
ggml-quants.h)
|
|
221
219
|
|
|
222
220
|
target_include_directories(ggml-base PRIVATE .)
|
|
223
221
|
|
|
@@ -226,44 +224,95 @@ add_library(ggml
|
|
|
226
224
|
|
|
227
225
|
target_link_libraries(ggml PUBLIC ggml-base)
|
|
228
226
|
|
|
227
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
228
|
+
target_link_libraries(ggml PRIVATE dl)
|
|
229
|
+
endif()
|
|
230
|
+
|
|
231
|
+
function(ggml_add_backend_library backend)
|
|
232
|
+
if (GGML_BACKEND_DL)
|
|
233
|
+
add_library(${backend} MODULE ${ARGN})
|
|
234
|
+
# write the shared library to the output directory
|
|
235
|
+
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
|
236
|
+
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
|
237
|
+
else()
|
|
238
|
+
add_library(${backend} ${ARGN})
|
|
239
|
+
target_link_libraries(ggml PUBLIC ${backend})
|
|
240
|
+
install(TARGETS ${backend} LIBRARY)
|
|
241
|
+
endif()
|
|
242
|
+
|
|
243
|
+
target_link_libraries(${backend} PRIVATE ggml-base)
|
|
244
|
+
target_include_directories(${backend} PRIVATE ..)
|
|
245
|
+
|
|
246
|
+
if (${BUILD_SHARED_LIBS})
|
|
247
|
+
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
|
|
248
|
+
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
|
|
249
|
+
endif()
|
|
250
|
+
endfunction()
|
|
251
|
+
|
|
229
252
|
function(ggml_add_backend backend)
|
|
230
253
|
string(TOUPPER "GGML_${backend}" backend_id)
|
|
231
254
|
if (${backend_id})
|
|
232
255
|
string(TOLOWER "ggml-${backend}" backend_target)
|
|
233
256
|
add_subdirectory(${backend_target})
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
# however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
|
|
237
|
-
if (${backend_id})
|
|
238
|
-
message(STATUS "Including ${backend} backend")
|
|
239
|
-
if (${BUILD_SHARED_LIBS})
|
|
240
|
-
target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
|
|
241
|
-
target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
|
|
242
|
-
install(TARGETS ${backend_target} LIBRARY)
|
|
243
|
-
endif()
|
|
244
|
-
target_link_libraries(ggml PUBLIC ${backend_target})
|
|
257
|
+
message(STATUS "Including ${backend} backend")
|
|
258
|
+
if (NOT GGML_BACKEND_DL)
|
|
245
259
|
string(TOUPPER "GGML_USE_${backend}" backend_use)
|
|
246
260
|
target_compile_definitions(ggml PUBLIC ${backend_use})
|
|
247
261
|
endif()
|
|
248
262
|
endif()
|
|
249
263
|
endfunction()
|
|
250
264
|
|
|
265
|
+
function(ggml_add_cpu_backend_variant tag_name)
|
|
266
|
+
set(GGML_CPU_TAG_NAME ${tag_name})
|
|
267
|
+
# other: OPENMP LLAMAFILE CPU_HBM
|
|
268
|
+
foreach (feat NATIVE
|
|
269
|
+
AVX AVX2 AVX_VNNI FMA F16C
|
|
270
|
+
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
|
271
|
+
AMX_TILE AMX_INT8 AMX_BF16)
|
|
272
|
+
set(GGML_${feat} OFF)
|
|
273
|
+
endforeach()
|
|
274
|
+
|
|
275
|
+
foreach (feat ${ARGN})
|
|
276
|
+
set(GGML_${feat} ON)
|
|
277
|
+
endforeach()
|
|
278
|
+
|
|
279
|
+
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
280
|
+
endfunction()
|
|
281
|
+
|
|
251
282
|
ggml_add_backend(CPU)
|
|
252
|
-
|
|
283
|
+
|
|
284
|
+
if (GGML_CPU_ALL_VARIANTS)
|
|
285
|
+
if (NOT GGML_BACKEND_DL)
|
|
286
|
+
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
287
|
+
endif()
|
|
288
|
+
ggml_add_cpu_backend_variant(sandybridge AVX)
|
|
289
|
+
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
|
290
|
+
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
|
291
|
+
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
292
|
+
if (NOT MSVC)
|
|
293
|
+
# MSVC doesn't support AVX-VNNI or AMX
|
|
294
|
+
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
|
295
|
+
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
296
|
+
endif()
|
|
297
|
+
else ()
|
|
298
|
+
ggml_add_cpu_backend_variant_impl("")
|
|
299
|
+
endif()
|
|
300
|
+
|
|
253
301
|
ggml_add_backend(BLAS)
|
|
254
302
|
ggml_add_backend(CANN)
|
|
255
303
|
ggml_add_backend(CUDA)
|
|
256
304
|
ggml_add_backend(HIP)
|
|
257
305
|
ggml_add_backend(Kompute)
|
|
258
306
|
ggml_add_backend(METAL)
|
|
307
|
+
ggml_add_backend(MUSA)
|
|
259
308
|
ggml_add_backend(RPC)
|
|
260
309
|
ggml_add_backend(SYCL)
|
|
261
310
|
ggml_add_backend(Vulkan)
|
|
262
|
-
ggml_add_backend(
|
|
311
|
+
ggml_add_backend(OpenCL)
|
|
263
312
|
|
|
264
313
|
foreach (target ggml-base ggml)
|
|
265
314
|
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
|
266
|
-
target_compile_features (${target} PRIVATE c_std_11) # don't bump
|
|
315
|
+
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
|
|
267
316
|
endforeach()
|
|
268
317
|
|
|
269
318
|
target_link_libraries(ggml-base PRIVATE Threads::Threads)
|