whispercpp 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +4 -2
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +150 -31
- data/ext/sources/examples/addon.node/index.js +3 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +3 -2
- data/ext/sources/examples/cli/cli.cpp +3 -2
- data/ext/sources/examples/command/command.cpp +32 -8
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +2 -0
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +169 -22
- data/ext/sources/examples/stream/stream.cpp +6 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +4 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +171 -3
- data/ext/sources/examples/talk-llama/llama-arch.h +28 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +741 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +112 -54
- data/ext/sources/examples/talk-llama/llama-chat.cpp +30 -8
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +520 -351
- data/ext/sources/examples/talk-llama/llama-context.h +38 -17
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +447 -372
- data/ext/sources/examples/talk-llama/llama-graph.h +128 -58
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +10 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +19 -2
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +14 -472
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +86 -26
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +88 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +42 -17
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1863 -563
- data/ext/sources/examples/talk-llama/llama-model.h +27 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +89 -6
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +65 -28
- data/ext/sources/examples/talk-llama/llama-vocab.h +1 -0
- data/ext/sources/examples/talk-llama/llama.cpp +11 -7
- data/ext/sources/examples/talk-llama/llama.h +147 -40
- data/ext/sources/examples/talk-llama/talk-llama.cpp +2 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +5 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +2 -0
- data/ext/sources/ggml/CMakeLists.txt +48 -3
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml.h +144 -5
- data/ext/sources/ggml/src/CMakeLists.txt +82 -24
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +5 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +46 -23
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +6 -1
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- data/ext/sources/ggml/src/ggml-common.h +4 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +133 -40
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +4 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +146 -105
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1057 -174
- data/ext/sources/ggml/src/ggml-cpu/ops.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +330 -38
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +111 -18
- data/ext/sources/ggml/src/ggml-cpu/vec.h +303 -94
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +60 -37
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +22 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +2 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +265 -123
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +257 -87
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +2 -3
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +5 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-impl.h +127 -183
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +27 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +331 -49
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +564 -282
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1859 -489
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/sources/ggml/src/ggml-quants.c +6 -8
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +28 -41
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +4 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +117 -165
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +192 -53
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +648 -1039
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +8 -105
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -100
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +60 -80
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +55 -74
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -10
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +138 -27
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +12 -16
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +726 -282
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +10 -1
- data/ext/sources/ggml/src/ggml.c +328 -48
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +24 -3
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +218 -169
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +56 -1
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +68 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +91 -43
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2739
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_params.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -3,11 +3,11 @@
|
|
3
3
|
|
4
4
|
#include "ggml-backend-impl.h"
|
5
5
|
#include "ggml-backend.h"
|
6
|
-
#include "
|
6
|
+
#include "traits.h"
|
7
7
|
#include "ggml-cpu-impl.h"
|
8
8
|
#include "ggml-cpu.h"
|
9
9
|
#include "ggml-impl.h"
|
10
|
-
#include "
|
10
|
+
#include "quants.h"
|
11
11
|
#include "ggml-threading.h"
|
12
12
|
#include "unary-ops.h"
|
13
13
|
#include "binary-ops.h"
|
@@ -72,15 +72,13 @@
|
|
72
72
|
#define UNUSED GGML_UNUSED
|
73
73
|
#define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0)
|
74
74
|
|
75
|
+
// precomputed f32 table for f16 (256 KB) (simd-mappings.h)
|
76
|
+
float ggml_table_f32_f16[1 << 16];
|
77
|
+
|
75
78
|
#if defined(__ARM_ARCH)
|
76
79
|
struct ggml_arm_arch_features_type {
|
77
|
-
int has_neon;
|
78
|
-
int has_dotprod;
|
79
|
-
int has_i8mm;
|
80
|
-
int has_sve;
|
81
80
|
int sve_cnt;
|
82
|
-
|
83
|
-
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
|
81
|
+
} ggml_arm_arch_features = { 0 };
|
84
82
|
#endif
|
85
83
|
|
86
84
|
|
@@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t;
|
|
197
195
|
|
198
196
|
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
199
197
|
[GGML_TYPE_F32] = {
|
198
|
+
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
|
200
199
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
201
200
|
.vec_dot_type = GGML_TYPE_F32,
|
202
201
|
.nrows = 1,
|
@@ -270,7 +269,11 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
270
269
|
.from_float = quantize_row_q4_K,
|
271
270
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
272
271
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
272
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
273
|
+
.nrows = 2,
|
274
|
+
#else
|
273
275
|
.nrows = 1,
|
276
|
+
#endif
|
274
277
|
},
|
275
278
|
[GGML_TYPE_Q5_K] = {
|
276
279
|
.from_float = quantize_row_q5_K,
|
@@ -555,6 +558,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
|
|
555
558
|
#endif
|
556
559
|
}
|
557
560
|
|
561
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
|
562
|
+
atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
563
|
+
}
|
564
|
+
|
565
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
|
566
|
+
return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
567
|
+
}
|
568
|
+
|
558
569
|
#if defined(__gnu_linux__)
|
559
570
|
static cpu_set_t ggml_get_numa_affinity(void) {
|
560
571
|
cpu_set_t cpuset;
|
@@ -666,87 +677,15 @@ bool ggml_is_numa(void) {
|
|
666
677
|
|
667
678
|
#if defined(__linux__) && defined(__aarch64__)
|
668
679
|
#include <sys/auxv.h>
|
669
|
-
#elif defined(__APPLE__)
|
670
|
-
#include <sys/sysctl.h>
|
671
|
-
#endif
|
672
|
-
|
673
|
-
#if !defined(HWCAP2_I8MM)
|
674
|
-
#define HWCAP2_I8MM (1 << 13)
|
675
|
-
#endif
|
676
|
-
|
677
|
-
#if !defined(HWCAP2_SME)
|
678
|
-
#define HWCAP2_SME (1 << 23)
|
679
680
|
#endif
|
680
681
|
|
681
682
|
static void ggml_init_arm_arch_features(void) {
|
682
|
-
#if defined(__linux__) && defined(__aarch64__)
|
683
|
-
uint32_t hwcap = getauxval(AT_HWCAP);
|
684
|
-
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
685
|
-
|
686
|
-
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
687
|
-
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
688
|
-
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
689
|
-
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
690
|
-
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
|
691
|
-
|
692
|
-
#if defined(__ARM_FEATURE_SVE)
|
683
|
+
#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
693
684
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
694
685
|
#endif
|
695
|
-
#elif defined(__APPLE__)
|
696
|
-
int oldp = 0;
|
697
|
-
size_t size = sizeof(oldp);
|
698
|
-
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
699
|
-
oldp = 0;
|
700
|
-
}
|
701
|
-
ggml_arm_arch_features.has_neon = oldp;
|
702
|
-
|
703
|
-
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
|
704
|
-
oldp = 0;
|
705
|
-
}
|
706
|
-
ggml_arm_arch_features.has_dotprod = oldp;
|
707
|
-
|
708
|
-
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
709
|
-
oldp = 0;
|
710
|
-
}
|
711
|
-
ggml_arm_arch_features.has_i8mm = oldp;
|
712
|
-
|
713
|
-
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
|
714
|
-
oldp = 0;
|
715
|
-
}
|
716
|
-
ggml_arm_arch_features.has_sme = oldp;
|
717
|
-
|
718
|
-
ggml_arm_arch_features.has_sve = 0;
|
719
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
720
|
-
#else
|
721
|
-
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
722
|
-
#if defined(__ARM_NEON)
|
723
|
-
ggml_arm_arch_features.has_neon = 1;
|
724
|
-
#else
|
725
|
-
ggml_arm_arch_features.has_neon = 0;
|
726
|
-
#endif
|
727
|
-
|
728
|
-
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
729
|
-
ggml_arm_arch_features.has_i8mm = 1;
|
730
|
-
#else
|
731
|
-
ggml_arm_arch_features.has_i8mm = 0;
|
732
|
-
#endif
|
733
|
-
|
734
|
-
#if defined(__ARM_FEATURE_SVE)
|
735
|
-
ggml_arm_arch_features.has_sve = 1;
|
736
|
-
ggml_arm_arch_features.sve_cnt = 16;
|
737
|
-
#else
|
738
|
-
ggml_arm_arch_features.has_sve = 0;
|
739
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
740
|
-
#endif
|
741
|
-
|
742
|
-
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
|
743
|
-
ggml_arm_arch_features.has_sme = 1;
|
744
|
-
#else
|
745
|
-
ggml_arm_arch_features.has_sme = 0;
|
746
|
-
#endif
|
747
|
-
#endif
|
748
686
|
}
|
749
|
-
|
687
|
+
|
688
|
+
#endif // __ARM_ARCH
|
750
689
|
|
751
690
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
752
691
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
@@ -801,7 +740,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
|
|
801
740
|
{
|
802
741
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
803
742
|
for (int i = 0; i < n; i++) {
|
804
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
743
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
805
744
|
}
|
806
745
|
} break;
|
807
746
|
case GGML_TYPE_BF16:
|
@@ -860,7 +799,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
|
|
860
799
|
{
|
861
800
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
862
801
|
for (int i = 0; i < n; i++) {
|
863
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
802
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
864
803
|
}
|
865
804
|
} break;
|
866
805
|
case GGML_TYPE_BF16:
|
@@ -911,7 +850,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
|
911
850
|
case GGML_TYPE_F16:
|
912
851
|
{
|
913
852
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
914
|
-
return
|
853
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
915
854
|
}
|
916
855
|
case GGML_TYPE_BF16:
|
917
856
|
{
|
@@ -956,7 +895,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
956
895
|
case GGML_TYPE_F16:
|
957
896
|
{
|
958
897
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
959
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
898
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
960
899
|
} break;
|
961
900
|
case GGML_TYPE_BF16:
|
962
901
|
{
|
@@ -985,7 +924,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
|
985
924
|
case GGML_TYPE_I32:
|
986
925
|
return ((int32_t *) data)[0];
|
987
926
|
case GGML_TYPE_F16:
|
988
|
-
return
|
927
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
989
928
|
case GGML_TYPE_BF16:
|
990
929
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
991
930
|
case GGML_TYPE_F32:
|
@@ -1012,7 +951,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1012
951
|
} break;
|
1013
952
|
case GGML_TYPE_F16:
|
1014
953
|
{
|
1015
|
-
((ggml_fp16_t *)(data))[0] =
|
954
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
1016
955
|
} break;
|
1017
956
|
case GGML_TYPE_BF16:
|
1018
957
|
{
|
@@ -1050,7 +989,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|
1050
989
|
}
|
1051
990
|
case GGML_TYPE_F16:
|
1052
991
|
{
|
1053
|
-
return
|
992
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
1054
993
|
}
|
1055
994
|
case GGML_TYPE_BF16:
|
1056
995
|
{
|
@@ -1089,7 +1028,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
1089
1028
|
} break;
|
1090
1029
|
case GGML_TYPE_F16:
|
1091
1030
|
{
|
1092
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
1031
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
1093
1032
|
} break;
|
1094
1033
|
case GGML_TYPE_BF16:
|
1095
1034
|
{
|
@@ -1116,7 +1055,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1116
1055
|
case GGML_TYPE_I32:
|
1117
1056
|
return ((int32_t *) data)[0];
|
1118
1057
|
case GGML_TYPE_F16:
|
1119
|
-
return
|
1058
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
1120
1059
|
case GGML_TYPE_BF16:
|
1121
1060
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
1122
1061
|
case GGML_TYPE_F32:
|
@@ -1143,7 +1082,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1143
1082
|
} break;
|
1144
1083
|
case GGML_TYPE_F16:
|
1145
1084
|
{
|
1146
|
-
((ggml_fp16_t *)(data))[0] =
|
1085
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
1147
1086
|
} break;
|
1148
1087
|
case GGML_TYPE_BF16:
|
1149
1088
|
{
|
@@ -1254,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
|
1254
1193
|
}
|
1255
1194
|
}
|
1256
1195
|
|
1257
|
-
|
1196
|
+
void ggml_compute_forward_mul_mat(
|
1258
1197
|
const struct ggml_compute_params * params,
|
1259
1198
|
struct ggml_tensor * dst) {
|
1260
1199
|
|
@@ -1879,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1879
1818
|
{
|
1880
1819
|
ggml_compute_forward_get_rows_back(params, tensor);
|
1881
1820
|
} break;
|
1821
|
+
case GGML_OP_SET_ROWS:
|
1822
|
+
{
|
1823
|
+
ggml_compute_forward_set_rows(params, tensor);
|
1824
|
+
} break;
|
1882
1825
|
case GGML_OP_DIAG:
|
1883
1826
|
{
|
1884
1827
|
ggml_compute_forward_diag(params, tensor);
|
@@ -1923,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1923
1866
|
{
|
1924
1867
|
ggml_compute_forward_im2col_back_f32(params, tensor);
|
1925
1868
|
} break;
|
1869
|
+
case GGML_OP_CONV_2D:
|
1870
|
+
{
|
1871
|
+
ggml_compute_forward_conv_2d(params, tensor);
|
1872
|
+
} break;
|
1926
1873
|
case GGML_OP_CONV_2D_DW:
|
1927
1874
|
{
|
1928
1875
|
ggml_compute_forward_conv_2d_dw(params, tensor);
|
@@ -1955,6 +1902,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1955
1902
|
{
|
1956
1903
|
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
1957
1904
|
} break;
|
1905
|
+
case GGML_OP_ROLL:
|
1906
|
+
{
|
1907
|
+
ggml_compute_forward_roll(params, tensor);
|
1908
|
+
} break;
|
1958
1909
|
case GGML_OP_ARANGE:
|
1959
1910
|
{
|
1960
1911
|
ggml_compute_forward_arange(params, tensor);
|
@@ -2002,6 +1953,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
2002
1953
|
{
|
2003
1954
|
ggml_compute_forward_unary(params, tensor);
|
2004
1955
|
} break;
|
1956
|
+
case GGML_OP_GLU:
|
1957
|
+
{
|
1958
|
+
ggml_compute_forward_glu(params, tensor);
|
1959
|
+
} break;
|
2005
1960
|
case GGML_OP_GET_REL_POS:
|
2006
1961
|
{
|
2007
1962
|
ggml_compute_forward_get_rel_pos(params, tensor);
|
@@ -2212,6 +2167,18 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2212
2167
|
GGML_ABORT("fatal error");
|
2213
2168
|
}
|
2214
2169
|
break;
|
2170
|
+
case GGML_OP_GLU:
|
2171
|
+
switch (ggml_get_glu_op(node)) {
|
2172
|
+
case GGML_GLU_OP_REGLU:
|
2173
|
+
case GGML_GLU_OP_GEGLU:
|
2174
|
+
case GGML_GLU_OP_SWIGLU:
|
2175
|
+
{
|
2176
|
+
n_tasks = n_threads;
|
2177
|
+
} break;
|
2178
|
+
default:
|
2179
|
+
GGML_ABORT("fatal error");
|
2180
|
+
}
|
2181
|
+
break;
|
2215
2182
|
case GGML_OP_SILU_BACK:
|
2216
2183
|
case GGML_OP_MUL:
|
2217
2184
|
case GGML_OP_DIV:
|
@@ -2228,6 +2195,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2228
2195
|
n_tasks = n_threads;
|
2229
2196
|
} break;
|
2230
2197
|
case GGML_OP_GET_ROWS:
|
2198
|
+
case GGML_OP_SET_ROWS:
|
2231
2199
|
{
|
2232
2200
|
// FIXME: get_rows can use additional threads, but the cost of launching additional threads
|
2233
2201
|
// decreases performance with GPU offloading
|
@@ -2264,6 +2232,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2264
2232
|
} break;
|
2265
2233
|
case GGML_OP_IM2COL:
|
2266
2234
|
case GGML_OP_IM2COL_BACK:
|
2235
|
+
case GGML_OP_CONV_2D:
|
2267
2236
|
case GGML_OP_CONV_2D_DW:
|
2268
2237
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
2269
2238
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
@@ -2279,6 +2248,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2279
2248
|
case GGML_OP_UPSCALE:
|
2280
2249
|
case GGML_OP_PAD:
|
2281
2250
|
case GGML_OP_PAD_REFLECT_1D:
|
2251
|
+
case GGML_OP_ROLL:
|
2282
2252
|
case GGML_OP_ARANGE:
|
2283
2253
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
2284
2254
|
case GGML_OP_ARGSORT:
|
@@ -2414,12 +2384,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2414
2384
|
// This is up to the applications.
|
2415
2385
|
DWORD p = THREAD_PRIORITY_NORMAL;
|
2416
2386
|
switch (prio) {
|
2387
|
+
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
|
2417
2388
|
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
|
2418
2389
|
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
|
2419
2390
|
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
|
2420
2391
|
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
|
2421
2392
|
}
|
2422
2393
|
|
2394
|
+
if (prio != GGML_SCHED_PRIO_LOW) {
|
2395
|
+
// Tell Windows that this thread should not be throttled (needs its own CPU core).
|
2396
|
+
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
|
2397
|
+
// all our threads onto the first 4 cores which results in terrible performance with
|
2398
|
+
// n_threads > 4
|
2399
|
+
#if _WIN32_WINNT >= 0x0602
|
2400
|
+
THREAD_POWER_THROTTLING_STATE t;
|
2401
|
+
ZeroMemory(&t, sizeof(t));
|
2402
|
+
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
|
2403
|
+
t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
|
2404
|
+
t.StateMask = 0;
|
2405
|
+
|
2406
|
+
if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
|
2407
|
+
GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
|
2408
|
+
return false;
|
2409
|
+
}
|
2410
|
+
#endif
|
2411
|
+
}
|
2412
|
+
|
2423
2413
|
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
2424
2414
|
// Keep inherited policy/priority
|
2425
2415
|
return true;
|
@@ -2447,6 +2437,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2447
2437
|
struct sched_param p;
|
2448
2438
|
int32_t policy = SCHED_OTHER;
|
2449
2439
|
switch (prio) {
|
2440
|
+
// TODO: there seems to be no way to set lower prio on Apple platforms
|
2441
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2450
2442
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2451
2443
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
2452
2444
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
@@ -2503,6 +2495,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2503
2495
|
struct sched_param p;
|
2504
2496
|
int32_t policy = SCHED_OTHER;
|
2505
2497
|
switch (prio) {
|
2498
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
|
2506
2499
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2507
2500
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
2508
2501
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
@@ -2758,6 +2751,10 @@ struct ggml_cplan ggml_graph_plan(
|
|
2758
2751
|
GGML_ABORT("fatal error");
|
2759
2752
|
}
|
2760
2753
|
} break;
|
2754
|
+
case GGML_OP_CONV_2D:
|
2755
|
+
{
|
2756
|
+
cur = GGML_IM2COL_WORK_SIZE;
|
2757
|
+
} break;
|
2761
2758
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
2762
2759
|
{
|
2763
2760
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
@@ -3158,6 +3155,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
|
|
3158
3155
|
return ggml_graph_compute(cgraph, &cplan);
|
3159
3156
|
}
|
3160
3157
|
|
3158
|
+
void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
|
3159
|
+
memcpy(y, x, n * sizeof(float));
|
3160
|
+
}
|
3161
|
+
|
3161
3162
|
void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
3162
3163
|
int64_t i = 0;
|
3163
3164
|
#if defined(__F16C__)
|
@@ -3178,9 +3179,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
|
3178
3179
|
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
3179
3180
|
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
3180
3181
|
}
|
3182
|
+
#elif defined(__NNPA__)
|
3183
|
+
for (; i + 7 < n; i += 8) {
|
3184
|
+
float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
|
3185
|
+
float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
|
3186
|
+
uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
|
3187
|
+
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
|
3188
|
+
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
|
3189
|
+
}
|
3190
|
+
for (; i + 3 < n; i += 4) {
|
3191
|
+
float32x4_t v_x = vec_xl(0, (const float *)(x + i));
|
3192
|
+
float32x4_t v_zero = vec_splats(0.0f);
|
3193
|
+
uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
|
3194
|
+
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
|
3195
|
+
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
|
3196
|
+
}
|
3181
3197
|
#endif
|
3182
3198
|
for (; i < n; ++i) {
|
3183
|
-
y[i] =
|
3199
|
+
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
|
3184
3200
|
}
|
3185
3201
|
}
|
3186
3202
|
|
@@ -3204,9 +3220,25 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
|
|
3204
3220
|
__m128 y_vec = _mm_cvtph_ps(x_vec);
|
3205
3221
|
_mm_storeu_ps(y + i, y_vec);
|
3206
3222
|
}
|
3223
|
+
#elif defined(__NNPA__)
|
3224
|
+
for (; i + 7 < n; i += 8) {
|
3225
|
+
uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
|
3226
|
+
uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
|
3227
|
+
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
|
3228
|
+
float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0);
|
3229
|
+
vec_xst(v_yh, 0, (float *)(y + i + 0));
|
3230
|
+
vec_xst(v_yl, 0, (float *)(y + i + 4));
|
3231
|
+
}
|
3232
|
+
for (; i + 3 < n; i += 4) {
|
3233
|
+
uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
|
3234
|
+
uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
|
3235
|
+
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
|
3236
|
+
vec_xst(v_yh, 0, (float *)(y + i));
|
3237
|
+
}
|
3207
3238
|
#endif
|
3239
|
+
|
3208
3240
|
for (; i < n; ++i) {
|
3209
|
-
y[i] =
|
3241
|
+
y[i] = GGML_CPU_FP16_TO_FP32(x[i]);
|
3210
3242
|
}
|
3211
3243
|
}
|
3212
3244
|
|
@@ -3406,9 +3438,17 @@ int ggml_cpu_has_vxe(void) {
|
|
3406
3438
|
#endif
|
3407
3439
|
}
|
3408
3440
|
|
3441
|
+
int ggml_cpu_has_nnpa(void) {
|
3442
|
+
#if defined(GGML_NNPA)
|
3443
|
+
return 1;
|
3444
|
+
#else
|
3445
|
+
return 0;
|
3446
|
+
#endif
|
3447
|
+
}
|
3448
|
+
|
3409
3449
|
int ggml_cpu_has_neon(void) {
|
3410
3450
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
3411
|
-
return
|
3451
|
+
return 1;
|
3412
3452
|
#else
|
3413
3453
|
return 0;
|
3414
3454
|
#endif
|
@@ -3416,7 +3456,7 @@ int ggml_cpu_has_neon(void) {
|
|
3416
3456
|
|
3417
3457
|
int ggml_cpu_has_dotprod(void) {
|
3418
3458
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
|
3419
|
-
return
|
3459
|
+
return 1;
|
3420
3460
|
#else
|
3421
3461
|
return 0;
|
3422
3462
|
#endif
|
@@ -3424,7 +3464,7 @@ int ggml_cpu_has_dotprod(void) {
|
|
3424
3464
|
|
3425
3465
|
int ggml_cpu_has_sve(void) {
|
3426
3466
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
3427
|
-
return
|
3467
|
+
return 1;
|
3428
3468
|
#else
|
3429
3469
|
return 0;
|
3430
3470
|
#endif
|
@@ -3432,7 +3472,7 @@ int ggml_cpu_has_sve(void) {
|
|
3432
3472
|
|
3433
3473
|
int ggml_cpu_has_matmul_int8(void) {
|
3434
3474
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
|
3435
|
-
return
|
3475
|
+
return 1;
|
3436
3476
|
#else
|
3437
3477
|
return 0;
|
3438
3478
|
#endif
|
@@ -3448,14 +3488,14 @@ int ggml_cpu_get_sve_cnt(void) {
|
|
3448
3488
|
|
3449
3489
|
int ggml_cpu_has_sme(void) {
|
3450
3490
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
|
3451
|
-
return
|
3491
|
+
return 1;
|
3452
3492
|
#else
|
3453
3493
|
return 0;
|
3454
3494
|
#endif
|
3455
3495
|
}
|
3456
3496
|
|
3457
3497
|
void ggml_cpu_init(void) {
|
3458
|
-
// needed to initialize
|
3498
|
+
// needed to initialize ggml_time
|
3459
3499
|
{
|
3460
3500
|
struct ggml_init_params params = { 0, NULL, false };
|
3461
3501
|
struct ggml_context * ctx = ggml_init(params);
|
@@ -3476,9 +3516,10 @@ void ggml_cpu_init(void) {
|
|
3476
3516
|
uint16_t u16;
|
3477
3517
|
ggml_fp16_t fp16;
|
3478
3518
|
} u = {i};
|
3479
|
-
float f =
|
3480
|
-
|
3481
|
-
|
3519
|
+
float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
3520
|
+
ggml_table_f32_f16[i] = f;
|
3521
|
+
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
|
3522
|
+
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
3482
3523
|
}
|
3483
3524
|
|
3484
3525
|
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#include "ggml-backend.h"
|
2
2
|
#include "ggml-backend-impl.h"
|
3
3
|
#include "ggml-cpu.h"
|
4
|
-
#include "
|
5
|
-
#include "
|
4
|
+
#include "repack.h"
|
5
|
+
#include "traits.h"
|
6
6
|
#include "ggml-impl.h"
|
7
7
|
#include "amx/amx.h"
|
8
8
|
|
@@ -11,7 +11,7 @@
|
|
11
11
|
#include <vector>
|
12
12
|
|
13
13
|
#ifdef GGML_USE_CPU_HBM
|
14
|
-
# include "
|
14
|
+
# include "hbm.h"
|
15
15
|
#endif
|
16
16
|
|
17
17
|
#ifdef GGML_USE_CPU_KLEIDIAI
|
@@ -51,9 +51,9 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|
51
51
|
}
|
52
52
|
#endif
|
53
53
|
|
54
|
-
#ifdef
|
55
|
-
if (
|
56
|
-
bufts.push_back(
|
54
|
+
#ifdef GGML_USE_CPU_REPACK
|
55
|
+
if (ggml_backend_cpu_repack_buffer_type()) {
|
56
|
+
bufts.push_back(ggml_backend_cpu_repack_buffer_type());
|
57
57
|
}
|
58
58
|
#endif
|
59
59
|
|
@@ -416,6 +416,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
416
416
|
|
417
417
|
switch (op->op) {
|
418
418
|
case GGML_OP_CPY:
|
419
|
+
case GGML_OP_SET_ROWS:
|
419
420
|
return
|
420
421
|
op->type != GGML_TYPE_IQ3_XXS &&
|
421
422
|
op->type != GGML_TYPE_IQ3_S &&
|
@@ -578,6 +579,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
578
579
|
if (ggml_cpu_has_vxe()) {
|
579
580
|
features.push_back({ "VXE", "1" });
|
580
581
|
}
|
582
|
+
if (ggml_cpu_has_nnpa()) {
|
583
|
+
features.push_back({ "NNPA", "1" });
|
584
|
+
}
|
581
585
|
if (ggml_cpu_has_wasm_simd()) {
|
582
586
|
features.push_back({ "WASM_SIMD", "1" });
|
583
587
|
}
|
@@ -596,8 +600,8 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
596
600
|
#ifdef GGML_USE_CPU_KLEIDIAI
|
597
601
|
features.push_back({ "KLEIDIAI", "1" });
|
598
602
|
#endif
|
599
|
-
#ifdef
|
600
|
-
features.push_back({ "
|
603
|
+
#ifdef GGML_USE_CPU_REPACK
|
604
|
+
features.push_back({ "REPACK", "1" });
|
601
605
|
#endif
|
602
606
|
|
603
607
|
features.push_back({ nullptr, nullptr });
|