@fugood/llama.node 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +29 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +17 -1
- package/src/LlamaContext.cpp +86 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -6,7 +6,20 @@
|
|
|
6
6
|
typedef uint16_t ggml_half;
|
|
7
7
|
typedef uint32_t ggml_half2;
|
|
8
8
|
|
|
9
|
-
#define
|
|
9
|
+
#define GGML_COMMON_AGGR_U
|
|
10
|
+
#define GGML_COMMON_AGGR_S
|
|
11
|
+
|
|
12
|
+
#define GGML_COMMON_DECL
|
|
13
|
+
#elif defined(GGML_COMMON_DECL_CPP)
|
|
14
|
+
#include <cstdint>
|
|
15
|
+
|
|
16
|
+
typedef uint16_t ggml_half;
|
|
17
|
+
typedef uint32_t ggml_half2;
|
|
18
|
+
|
|
19
|
+
// std-c++ allow anonymous unions but some compiler warn on it
|
|
20
|
+
#define GGML_COMMON_AGGR_U data
|
|
21
|
+
// std-c++ do not allow it.
|
|
22
|
+
#define GGML_COMMON_AGGR_S data
|
|
10
23
|
|
|
11
24
|
#define GGML_COMMON_DECL
|
|
12
25
|
#elif defined(GGML_COMMON_DECL_METAL)
|
|
@@ -15,7 +28,8 @@ typedef uint32_t ggml_half2;
|
|
|
15
28
|
typedef half ggml_half;
|
|
16
29
|
typedef half2 ggml_half2;
|
|
17
30
|
|
|
18
|
-
#define
|
|
31
|
+
#define GGML_COMMON_AGGR_U
|
|
32
|
+
#define GGML_COMMON_AGGR_S
|
|
19
33
|
|
|
20
34
|
#define GGML_COMMON_DECL
|
|
21
35
|
#elif defined(GGML_COMMON_DECL_CUDA)
|
|
@@ -29,7 +43,8 @@ typedef half2 ggml_half2;
|
|
|
29
43
|
typedef half ggml_half;
|
|
30
44
|
typedef half2 ggml_half2;
|
|
31
45
|
|
|
32
|
-
#define
|
|
46
|
+
#define GGML_COMMON_AGGR_U
|
|
47
|
+
#define GGML_COMMON_AGGR_S data
|
|
33
48
|
|
|
34
49
|
#define GGML_COMMON_DECL
|
|
35
50
|
#elif defined(GGML_COMMON_DECL_HIP)
|
|
@@ -39,7 +54,8 @@ typedef half2 ggml_half2;
|
|
|
39
54
|
typedef half ggml_half;
|
|
40
55
|
typedef half2 ggml_half2;
|
|
41
56
|
|
|
42
|
-
#define
|
|
57
|
+
#define GGML_COMMON_AGGR_U
|
|
58
|
+
#define GGML_COMMON_AGGR_S data
|
|
43
59
|
|
|
44
60
|
#define GGML_COMMON_DECL
|
|
45
61
|
#elif defined(GGML_COMMON_DECL_SYCL)
|
|
@@ -49,7 +65,8 @@ typedef half2 ggml_half2;
|
|
|
49
65
|
typedef sycl::half ggml_half;
|
|
50
66
|
typedef sycl::half2 ggml_half2;
|
|
51
67
|
|
|
52
|
-
#define
|
|
68
|
+
#define GGML_COMMON_AGGR_U
|
|
69
|
+
#define GGML_COMMON_AGGR_S data
|
|
53
70
|
|
|
54
71
|
#define GGML_COMMON_DECL
|
|
55
72
|
#endif
|
|
@@ -154,9 +171,9 @@ typedef struct {
|
|
|
154
171
|
struct {
|
|
155
172
|
ggml_half d; // delta
|
|
156
173
|
ggml_half m; // min
|
|
157
|
-
}
|
|
174
|
+
} GGML_COMMON_AGGR_S;
|
|
158
175
|
ggml_half2 dm;
|
|
159
|
-
};
|
|
176
|
+
} GGML_COMMON_AGGR_U;
|
|
160
177
|
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
|
161
178
|
} block_q4_1;
|
|
162
179
|
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
|
@@ -175,9 +192,9 @@ typedef struct {
|
|
|
175
192
|
struct {
|
|
176
193
|
ggml_half d; // delta
|
|
177
194
|
ggml_half m; // min
|
|
178
|
-
}
|
|
195
|
+
} GGML_COMMON_AGGR_S;
|
|
179
196
|
ggml_half2 dm;
|
|
180
|
-
};
|
|
197
|
+
} GGML_COMMON_AGGR_U;
|
|
181
198
|
uint8_t qh[4]; // 5-th bit of quants
|
|
182
199
|
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
|
183
200
|
} block_q5_1;
|
|
@@ -196,37 +213,13 @@ typedef struct {
|
|
|
196
213
|
struct {
|
|
197
214
|
ggml_half d; // delta
|
|
198
215
|
ggml_half s; // d * sum(qs[i])
|
|
199
|
-
}
|
|
216
|
+
} GGML_COMMON_AGGR_S;
|
|
200
217
|
ggml_half2 ds;
|
|
201
|
-
};
|
|
218
|
+
} GGML_COMMON_AGGR_U;
|
|
202
219
|
int8_t qs[QK8_1]; // quants
|
|
203
220
|
} block_q8_1;
|
|
204
221
|
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
|
|
205
222
|
|
|
206
|
-
typedef struct {
|
|
207
|
-
ggml_half d[4]; // deltas for 4 q4_0 blocks
|
|
208
|
-
uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
|
|
209
|
-
} block_q4_0x4;
|
|
210
|
-
static_assert(sizeof(block_q4_0x4) == 4 * sizeof(ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
|
|
211
|
-
|
|
212
|
-
typedef struct {
|
|
213
|
-
ggml_half d[8]; // deltas for 8 q4_0 blocks
|
|
214
|
-
uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
|
|
215
|
-
} block_q4_0x8;
|
|
216
|
-
static_assert(sizeof(block_q4_0x8) == 8 * sizeof(ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
|
|
217
|
-
|
|
218
|
-
typedef struct {
|
|
219
|
-
ggml_half d[4]; // deltas for 4 q8_0 blocks
|
|
220
|
-
int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
|
|
221
|
-
} block_q8_0x4;
|
|
222
|
-
static_assert(sizeof(block_q8_0x4) == 4 * sizeof(ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
|
|
223
|
-
|
|
224
|
-
typedef struct {
|
|
225
|
-
ggml_half d[8]; // deltas for 8 q8_0 blocks
|
|
226
|
-
int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
|
|
227
|
-
} block_q8_0x8;
|
|
228
|
-
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
|
|
229
|
-
|
|
230
223
|
//
|
|
231
224
|
// Ternary quantization
|
|
232
225
|
//
|
|
@@ -261,9 +254,9 @@ typedef struct {
|
|
|
261
254
|
struct {
|
|
262
255
|
ggml_half d; // super-block scale for quantized scales
|
|
263
256
|
ggml_half dmin; // super-block scale for quantized mins
|
|
264
|
-
}
|
|
257
|
+
} GGML_COMMON_AGGR_S;
|
|
265
258
|
ggml_half2 dm;
|
|
266
|
-
};
|
|
259
|
+
} GGML_COMMON_AGGR_U;
|
|
267
260
|
} block_q2_K;
|
|
268
261
|
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
|
269
262
|
|
|
@@ -288,9 +281,9 @@ typedef struct {
|
|
|
288
281
|
struct {
|
|
289
282
|
ggml_half d; // super-block scale for quantized scales
|
|
290
283
|
ggml_half dmin; // super-block scale for quantized mins
|
|
291
|
-
}
|
|
284
|
+
} GGML_COMMON_AGGR_S;
|
|
292
285
|
ggml_half2 dm;
|
|
293
|
-
};
|
|
286
|
+
} GGML_COMMON_AGGR_U;
|
|
294
287
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
295
288
|
uint8_t qs[QK_K/2]; // 4--bit quants
|
|
296
289
|
} block_q4_K;
|
|
@@ -305,9 +298,9 @@ typedef struct {
|
|
|
305
298
|
struct {
|
|
306
299
|
ggml_half d; // super-block scale for quantized scales
|
|
307
300
|
ggml_half dmin; // super-block scale for quantized mins
|
|
308
|
-
}
|
|
301
|
+
} GGML_COMMON_AGGR_S;
|
|
309
302
|
ggml_half2 dm;
|
|
310
|
-
};
|
|
303
|
+
} GGML_COMMON_AGGR_U;
|
|
311
304
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
312
305
|
uint8_t qh[QK_K/8]; // quants, high bit
|
|
313
306
|
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
|
@@ -431,6 +424,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
|
|
|
431
424
|
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
|
432
425
|
#define GGML_TABLE_END() };
|
|
433
426
|
|
|
427
|
+
#define GGML_COMMON_IMPL
|
|
428
|
+
#elif defined(GGML_COMMON_IMPL_CPP)
|
|
429
|
+
#include <cstdint>
|
|
430
|
+
|
|
431
|
+
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
|
432
|
+
#define GGML_TABLE_END() };
|
|
433
|
+
|
|
434
434
|
#define GGML_COMMON_IMPL
|
|
435
435
|
#elif defined(GGML_COMMON_IMPL_METAL)
|
|
436
436
|
#include <metal_stdlib>
|
|
@@ -473,7 +473,7 @@ GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
|
|
473
473
|
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
|
474
474
|
GGML_TABLE_END()
|
|
475
475
|
|
|
476
|
-
//#if __CUDA_ARCH__ >=
|
|
476
|
+
//#if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
|
|
477
477
|
GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
|
478
478
|
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
|
479
479
|
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|