RubyGems - whispercpp - Versions diffs - 1.3.2 → 1.3.3 - Mend

whispercpp 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (244) hide show

checksums.yaml +4 -4
data/.gitignore +6 -3
data/README.md +71 -14
data/Rakefile +20 -7
data/ext/.gitignore +4 -6
data/ext/dependencies.rb +36 -24
data/ext/extconf.rb +1 -1
data/ext/options.rb +48 -184
data/ext/ruby_whisper.c +18 -0
data/ext/ruby_whisper_context.c +43 -12
data/ext/ruby_whisper_model.c +1 -1
data/ext/ruby_whisper_params.c +4 -2
data/ext/ruby_whisper_segment.c +81 -4
data/ext/ruby_whisper_transcribe.cpp +13 -7
data/ext/ruby_whisper_vad_params.c +1 -1
data/ext/sources/CMakeLists.txt +5 -1
data/ext/sources/bindings/javascript/package.json +1 -1
data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
data/ext/sources/examples/addon.node/addon.cpp +150 -31
data/ext/sources/examples/addon.node/index.js +3 -0
data/ext/sources/examples/addon.node/vad-example.js +132 -0
data/ext/sources/examples/bench/bench.cpp +3 -2
data/ext/sources/examples/cli/cli.cpp +3 -2
data/ext/sources/examples/command/command.cpp +32 -8
data/ext/sources/examples/common-whisper.cpp +14 -7
data/ext/sources/examples/lsp/lsp.cpp +2 -0
data/ext/sources/examples/quantize/quantize.cpp +3 -0
data/ext/sources/examples/server/CMakeLists.txt +3 -0
data/ext/sources/examples/server/server.cpp +169 -22
data/ext/sources/examples/stream/stream.cpp +6 -0
data/ext/sources/examples/talk-llama/CMakeLists.txt +4 -1
data/ext/sources/examples/talk-llama/llama-arch.cpp +171 -3
data/ext/sources/examples/talk-llama/llama-arch.h +28 -1
data/ext/sources/examples/talk-llama/llama-batch.cpp +741 -272
data/ext/sources/examples/talk-llama/llama-batch.h +112 -54
data/ext/sources/examples/talk-llama/llama-chat.cpp +30 -8
data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
data/ext/sources/examples/talk-llama/llama-context.cpp +520 -351
data/ext/sources/examples/talk-llama/llama-context.h +38 -17
data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
data/ext/sources/examples/talk-llama/llama-cparams.h +1 -1
data/ext/sources/examples/talk-llama/llama-graph.cpp +447 -372
data/ext/sources/examples/talk-llama/llama-graph.h +128 -58
data/ext/sources/examples/talk-llama/llama-hparams.cpp +10 -2
data/ext/sources/examples/talk-llama/llama-hparams.h +19 -2
data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
data/ext/sources/examples/talk-llama/llama-kv-cache.h +14 -472
data/ext/sources/examples/talk-llama/llama-kv-cells.h +86 -26
data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
data/ext/sources/examples/talk-llama/llama-memory.h +88 -4
data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
data/ext/sources/examples/talk-llama/llama-model-loader.cpp +42 -17
data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
data/ext/sources/examples/talk-llama/llama-model.cpp +1863 -563
data/ext/sources/examples/talk-llama/llama-model.h +27 -0
data/ext/sources/examples/talk-llama/llama-quant.cpp +89 -6
data/ext/sources/examples/talk-llama/llama-vocab.cpp +65 -28
data/ext/sources/examples/talk-llama/llama-vocab.h +1 -0
data/ext/sources/examples/talk-llama/llama.cpp +11 -7
data/ext/sources/examples/talk-llama/llama.h +147 -40
data/ext/sources/examples/talk-llama/talk-llama.cpp +2 -0
data/ext/sources/examples/talk-llama/unicode.cpp +5 -0
data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +2 -0
data/ext/sources/ggml/CMakeLists.txt +48 -3
data/ext/sources/ggml/cmake/common.cmake +24 -0
data/ext/sources/ggml/include/ggml-backend.h +1 -1
data/ext/sources/ggml/include/ggml-cpu.h +2 -0
data/ext/sources/ggml/include/ggml.h +144 -5
data/ext/sources/ggml/src/CMakeLists.txt +82 -24
data/ext/sources/ggml/src/ggml-backend-reg.cpp +5 -0
data/ext/sources/ggml/src/ggml-backend.cpp +46 -23
data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +3 -3
data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +1 -0
data/ext/sources/ggml/src/ggml-cann/common.h +6 -1
data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
data/ext/sources/ggml/src/ggml-common.h +4 -0
data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +133 -40
data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
data/ext/sources/ggml/src/ggml-cpu/common.h +4 -3
data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +146 -105
data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1057 -174
data/ext/sources/ggml/src/ggml-cpu/ops.h +8 -0
data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +330 -38
data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
data/ext/sources/ggml/src/ggml-cpu/vec.cpp +111 -18
data/ext/sources/ggml/src/ggml-cpu/vec.h +303 -94
data/ext/sources/ggml/src/ggml-cuda/common.cuh +60 -37
data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
data/ext/sources/ggml/src/ggml-cuda/convert.cu +22 -0
data/ext/sources/ggml/src/ggml-cuda/convert.cuh +5 -0
data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +2 -2
data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +265 -123
data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
data/ext/sources/ggml/src/ggml-cuda/mmv.cu +257 -87
data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +2 -3
data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +5 -18
data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
data/ext/sources/ggml/src/ggml-cuda/unary.cu +89 -0
data/ext/sources/ggml/src/ggml-cuda/unary.cuh +7 -0
data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +4 -0
data/ext/sources/ggml/src/ggml-impl.h +127 -183
data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +11 -10
data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +27 -0
data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +331 -49
data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +564 -282
data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +14 -0
data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1859 -489
data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
data/ext/sources/ggml/src/ggml-quants.c +6 -8
data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +5 -6
data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
data/ext/sources/ggml/src/ggml-sycl/concat.cpp +28 -41
data/ext/sources/ggml/src/ggml-sycl/conv.cpp +4 -10
data/ext/sources/ggml/src/ggml-sycl/convert.cpp +117 -165
data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +192 -53
data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +49 -67
data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +648 -1039
data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +18 -9
data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +8 -105
data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -100
data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +60 -80
data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +158 -203
data/ext/sources/ggml/src/ggml-sycl/norm.cpp +55 -74
data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -10
data/ext/sources/ggml/src/ggml-sycl/rope.cpp +138 -27
data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +3 -8
data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +12 -16
data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +726 -282
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +10 -1
data/ext/sources/ggml/src/ggml.c +328 -48
data/ext/sources/ggml/src/ggml.cpp +26 -0
data/ext/sources/ggml/src/gguf.cpp +24 -3
data/ext/sources/include/whisper.h +2 -0
data/ext/sources/src/CMakeLists.txt +2 -0
data/ext/sources/src/coreml/whisper-compat.h +10 -0
data/ext/sources/src/coreml/whisper-compat.m +35 -0
data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
data/ext/sources/src/whisper.cpp +218 -169
data/extsources.rb +15 -9
data/lib/whisper/context.rb +15 -0
data/lib/whisper/model/uri.rb +56 -1
data/lib/whisper/segment.rb +58 -0
data/sig/whisper.rbs +68 -38
data/{tests → test}/helper.rb +1 -12
data/{tests → test}/test_model.rb +9 -0
data/test/test_package.rb +51 -0
data/test/test_segment.rb +146 -0
data/{tests → test}/test_whisper.rb +70 -0
data/whispercpp.gemspec +2 -3
metadata +91 -43
data/ext/sources/.dockerignore +0 -3
data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
data/ext/sources/ci/run.sh +0 -336
data/ext/sources/close-issue.yml +0 -28
data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2739
data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
data/tests/test_package.rb +0 -46
data/tests/test_segment.rb +0 -74
/data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
/data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
/data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
/data/{tests → test}/jfk_reader/.gitignore +0 -0
/data/{tests → test}/jfk_reader/extconf.rb +0 -0
/data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
/data/{tests → test}/test_callback.rb +0 -0
/data/{tests → test}/test_error.rb +0 -0
/data/{tests → test}/test_params.rb +0 -0
/data/{tests → test}/test_vad.rb +0 -0
/data/{tests → test}/test_vad_params.rb +0 -0

data/ext/sources/ggml/include/ggml.h CHANGED Viewed

@@ -470,6 +470,7 @@ extern "C" {
         GGML_OP_TRANSPOSE,
         GGML_OP_GET_ROWS,
         GGML_OP_GET_ROWS_BACK,
+        GGML_OP_SET_ROWS,
         GGML_OP_DIAG,
         GGML_OP_DIAG_MASK_INF,
         GGML_OP_DIAG_MASK_ZERO,
@@ -481,6 +482,7 @@ extern "C" {
         GGML_OP_CONV_TRANSPOSE_1D,
         GGML_OP_IM2COL,
         GGML_OP_IM2COL_BACK,
+        GGML_OP_CONV_2D,
         GGML_OP_CONV_2D_DW,
         GGML_OP_CONV_TRANSPOSE_2D,
         GGML_OP_POOL_1D,
@@ -489,6 +491,7 @@ extern "C" {
         GGML_OP_UPSCALE, // nearest interpolate
         GGML_OP_PAD,
         GGML_OP_PAD_REFLECT_1D,
+        GGML_OP_ROLL,
         GGML_OP_ARANGE,
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
@@ -518,6 +521,8 @@ extern "C" {
         GGML_OP_CROSS_ENTROPY_LOSS_BACK,
         GGML_OP_OPT_STEP_ADAMW,
+        GGML_OP_GLU,
         GGML_OP_COUNT,
     };
@@ -541,6 +546,14 @@ extern "C" {
         GGML_UNARY_OP_COUNT,
     };
+    enum ggml_glu_op {
+        GGML_GLU_OP_REGLU,
+        GGML_GLU_OP_GEGLU,
+        GGML_GLU_OP_SWIGLU,
+        GGML_GLU_OP_COUNT,
+    };
     enum ggml_object_type {
         GGML_OBJECT_TYPE_TENSOR,
         GGML_OBJECT_TYPE_GRAPH,
@@ -656,6 +669,7 @@ extern "C" {
     GGML_API const char * ggml_op_symbol(enum ggml_op   op);
     GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
+    GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
     GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor);
@@ -686,6 +700,9 @@ extern "C" {
     // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
     GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
+    // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
+    GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
     GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
     GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
@@ -757,6 +774,7 @@ extern "C" {
     GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
     GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
+    GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor);
     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -935,6 +953,15 @@ extern "C" {
             struct ggml_tensor  * a,
             struct ggml_tensor  * b);
+    // repeat a to the specified shape
+    GGML_API struct ggml_tensor * ggml_repeat_4d(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+                       int64_t    ne0,
+                       int64_t    ne1,
+                       int64_t    ne2,
+                       int64_t    ne3);
     // sums repetitions in a into shape of b
     GGML_API struct ggml_tensor * ggml_repeat_back(
             struct ggml_context * ctx,
@@ -1076,6 +1103,63 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
+    // gated linear unit ops
+    // A: n columns, r rows,
+    // result is n / 2 columns, r rows,
+    // expects gate in second half of row, unless swapped is true
+    GGML_API struct ggml_tensor * ggml_glu(
+            struct ggml_context * ctx,
+             struct ggml_tensor * a,
+             enum ggml_glu_op     op,
+             bool                 swapped);
+    GGML_API struct ggml_tensor * ggml_reglu(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    GGML_API struct ggml_tensor * ggml_reglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    GGML_API struct ggml_tensor * ggml_geglu(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    GGML_API struct ggml_tensor * ggml_geglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    GGML_API struct ggml_tensor * ggml_swiglu(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    GGML_API struct ggml_tensor * ggml_swiglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+    // A: n columns, r rows,
+    // B: n columns, r rows,
+    GGML_API struct ggml_tensor * ggml_glu_split(
+            struct ggml_context * ctx,
+             struct ggml_tensor * a,
+             struct ggml_tensor * b,
+             enum ggml_glu_op     op);
+    GGML_API struct ggml_tensor * ggml_reglu_split(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b);
+    GGML_API struct ggml_tensor * ggml_geglu_split(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b);
+    GGML_API struct ggml_tensor * ggml_swiglu_split(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b);
     // normalize along rows
     GGML_API struct ggml_tensor * ggml_norm(
             struct ggml_context * ctx,
@@ -1365,6 +1449,23 @@ extern "C" {
             struct ggml_tensor  * b,  // row indices
             struct ggml_tensor  * c); // data for ggml_get_rows, only used for its shape
+    // a TD  [n_embd, ne1,    ne2,    ne3]
+    // b TS  [n_embd, n_rows, ne02,   ne03] | ne02 == ne2, ne03 == ne3
+    // c I64 [n_rows, ne11,   ne12,   1]    | c[i] in [0, ne1)
+    //
+    // undefined behavior if destination rows overlap
+    //
+    // broadcast:
+    //   ne2 % ne11 == 0
+    //   ne3 % ne12 == 0
+    //
+    // return view(a)
+    GGML_API struct ggml_tensor * ggml_set_rows(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,  // destination
+            struct ggml_tensor  * b,  // source
+            struct ggml_tensor  * c); // row indices
     GGML_API struct ggml_tensor * ggml_diag(
         struct ggml_context     * ctx,
         struct ggml_tensor      * a);
@@ -1713,6 +1814,17 @@ extern "C" {
             struct ggml_tensor  * b,
             int                   stride);
+    GGML_API struct ggml_tensor * ggml_conv_2d_direct(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
+            struct ggml_tensor  * b,   // input data [W, H, C, N]
+            int                   s0,  // stride dimension 0
+            int                   s1,  // stride dimension 1
+            int                   p0,  // padding dimension 0
+            int                   p1,  // padding dimension 1
+            int                   d0,  // dilation dimension 0
+            int                   d1); // dilation dimension 1
     enum ggml_op_pool {
         GGML_OP_POOL_MAX,
         GGML_OP_POOL_AVG,
@@ -1755,6 +1867,12 @@ extern "C" {
     enum ggml_scale_mode {
         GGML_SCALE_MODE_NEAREST  = 0,
         GGML_SCALE_MODE_BILINEAR = 1,
+        GGML_SCALE_MODE_COUNT
+    };
+    enum ggml_scale_flag {
+        GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
     };
     // interpolate
@@ -1767,14 +1885,26 @@ extern "C" {
     // interpolate
     // interpolate scale to specified dimensions
-    GGML_API struct ggml_tensor * ggml_upscale_ext(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             int                   ne0,
             int                   ne1,
             int                   ne2,
             int                   ne3,
-            enum ggml_scale_mode  mode);
+            enum ggml_scale_mode  mode),
+        "use ggml_interpolate instead");
+    // Up- or downsamples the input to the specified size.
+    // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
+    GGML_API struct ggml_tensor * ggml_interpolate(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int64_t               ne0,
+            int64_t               ne1,
+            int64_t               ne2,
+            int64_t               ne3,
+            uint32_t              mode); // ggml_scale_mode [ | ggml_scale_flag...]
     // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
     GGML_API struct ggml_tensor * ggml_pad(
@@ -1792,6 +1922,17 @@ extern "C" {
             int                   p0,
             int                   p1);
+    // Move tensor elements by an offset given for each dimension. Elements that
+    // are shifted beyond the last position are wrapped around to the beginning.
+    GGML_API struct ggml_tensor * ggml_roll(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int                   shift0,
+            int                   shift1,
+            int                   shift2,
+            int                   shift3);
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
     // timesteps: [N,]
     // return: [N, dim]
@@ -2086,9 +2227,6 @@ extern "C" {
     GGML_API struct ggml_tensor * ggml_graph_get_grad    (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
     GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
-    GGML_API void                 ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
-    GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
     // print info and performance information for the graph
     GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
@@ -2172,6 +2310,7 @@ extern "C" {
     // scheduling priorities
     enum ggml_sched_priority {
+        GGML_SCHED_PRIO_LOW = -1,
         GGML_SCHED_PRIO_NORMAL,
         GGML_SCHED_PRIO_MEDIUM,
         GGML_SCHED_PRIO_HIGH,

data/ext/sources/ggml/src/CMakeLists.txt CHANGED Viewed

@@ -109,6 +109,8 @@ if (MSVC)
 else ()
     set(CMAKE_GENERATOR_PLATFORM_LWR "")
 endif ()
+ggml_get_system_arch()
+message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
 if (NOT MSVC)
     if (GGML_STATIC)
@@ -123,7 +125,6 @@ if (NOT MSVC)
 endif()
 if (MINGW)
-    # Target Windows 8 for PrefetchVirtualMemory
     add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
 endif()
@@ -194,6 +195,7 @@ add_library(ggml-base
             ../include/ggml-opt.h
             ../include/gguf.h
             ggml.c
+            ggml.cpp
             ggml-alloc.c
             ggml-backend.cpp
             ggml-opt.cpp
@@ -210,6 +212,7 @@ endif()
 add_library(ggml
             ggml-backend-reg.cpp)
+add_library(ggml::ggml ALIAS ggml)
 target_link_libraries(ggml PUBLIC ggml-base)
@@ -224,8 +227,8 @@ function(ggml_add_backend_library backend)
         set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
         add_dependencies(ggml ${backend})
-        install(TARGETS ${backend} LIBRARY DESTINATION bin)
-   else()
+        install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
+    else()
         add_library(${backend} ${ARGN})
         target_link_libraries(ggml PUBLIC ${backend})
         install(TARGETS ${backend} LIBRARY)
@@ -267,17 +270,27 @@ endfunction()
 function(ggml_add_cpu_backend_variant tag_name)
     set(GGML_CPU_TAG_NAME ${tag_name})
     # other: OPENMP LLAMAFILE CPU_HBM
-    foreach (feat NATIVE
-                  SSE42
-                  AVX AVX2 BMI2 AVX_VNNI FMA F16C
-                  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
-                  AMX_TILE AMX_INT8 AMX_BF16)
-        set(GGML_${feat} OFF)
-    endforeach()
-    foreach (feat ${ARGN})
-        set(GGML_${feat} ON)
-    endforeach()
+    if (GGML_SYSTEM_ARCH STREQUAL "x86")
+        foreach (feat NATIVE
+                      SSE42
+                      AVX AVX2 BMI2 AVX_VNNI FMA F16C
+                      AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
+                      AMX_TILE AMX_INT8 AMX_BF16)
+            set(GGML_${feat} OFF)
+        endforeach()
+        foreach (feat ${ARGN})
+            set(GGML_${feat} ON)
+        endforeach()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
+        foreach (feat ${ARGN})
+            set(GGML_INTERNAL_${feat} ON)
+        endforeach()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+        foreach (feat ${ARGN})
+            set(GGML_INTERNAL_${feat} ON)
+        endforeach()
+    endif()
     ggml_add_cpu_backend_variant_impl(${tag_name})
 endfunction()
@@ -287,17 +300,62 @@ ggml_add_backend(CPU)
 if (GGML_CPU_ALL_VARIANTS)
     if (NOT GGML_BACKEND_DL)
         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
+    elseif (GGML_CPU_ARM_ARCH)
+        message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
     endif()
-    ggml_add_cpu_backend_variant(x64)
-    ggml_add_cpu_backend_variant(sse42        SSE42)
-    ggml_add_cpu_backend_variant(sandybridge  SSE42 AVX)
-    ggml_add_cpu_backend_variant(haswell      SSE42 AVX F16C AVX2 BMI2 FMA)
-    ggml_add_cpu_backend_variant(skylakex     SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
-    ggml_add_cpu_backend_variant(icelake      SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
-    ggml_add_cpu_backend_variant(alderlake    SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
-    if (NOT MSVC)
-        # MSVC doesn't support AMX
-        ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
+    if (GGML_SYSTEM_ARCH STREQUAL "x86")
+        ggml_add_cpu_backend_variant(x64)
+        ggml_add_cpu_backend_variant(sse42        SSE42)
+        ggml_add_cpu_backend_variant(sandybridge  SSE42 AVX)
+        ggml_add_cpu_backend_variant(haswell      SSE42 AVX F16C AVX2 BMI2 FMA)
+        ggml_add_cpu_backend_variant(skylakex     SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
+        ggml_add_cpu_backend_variant(icelake      SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
+        ggml_add_cpu_backend_variant(alderlake    SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
+        if (NOT MSVC)
+            # MSVC doesn't support AMX
+            ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
+        endif()
+    elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
+        if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+            # Many of these features are optional so we build versions with popular
+            # combinations and name the backends based on the version they were
+            # first released with
+            ggml_add_cpu_backend_variant(armv8.0_1)
+            ggml_add_cpu_backend_variant(armv8.2_1    DOTPROD)
+            ggml_add_cpu_backend_variant(armv8.2_2    DOTPROD FP16_VECTOR_ARITHMETIC)
+            ggml_add_cpu_backend_variant(armv8.2_3    DOTPROD FP16_VECTOR_ARITHMETIC SVE)
+            ggml_add_cpu_backend_variant(armv8.6_1    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
+            ggml_add_cpu_backend_variant(armv8.6_2    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
+            ggml_add_cpu_backend_variant(armv9.2_1    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
+            ggml_add_cpu_backend_variant(armv9.2_2    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
+        elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
+            # Android-specific backends with SoC-compatible feature sets
+            ggml_add_cpu_backend_variant(android_armv8.0_1)
+            ggml_add_cpu_backend_variant(android_armv8.2_1    DOTPROD)
+            ggml_add_cpu_backend_variant(android_armv8.2_2    DOTPROD FP16_VECTOR_ARITHMETIC)
+            ggml_add_cpu_backend_variant(android_armv8.6_1    DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
+        elseif (APPLE)
+            ggml_add_cpu_backend_variant(apple_m1             DOTPROD)
+            ggml_add_cpu_backend_variant(apple_m2_m3          DOTPROD MATMUL_INT8)
+            ggml_add_cpu_backend_variant(apple_m4             DOTPROD MATMUL_INT8 NOSVE SME)
+        else()
+            message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
+        endif()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+        if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+            ggml_add_cpu_backend_variant(power0)
+            ggml_add_cpu_backend_variant(power7_1       POWER7)
+            ggml_add_cpu_backend_variant(power7_2       POWER7  VSX)
+            ggml_add_cpu_backend_variant(power8_1       POWER8)
+            ggml_add_cpu_backend_variant(power8_2       POWER8  VSX)
+            ggml_add_cpu_backend_variant(power9         POWER9  VSX)
+            ggml_add_cpu_backend_variant(power10        POWER10 VSX)
+            ggml_add_cpu_backend_variant(power11        POWER11 VSX)
+        else()
+            message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
+        endif()
+    else()
+        message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
     endif()
 elseif (GGML_CPU)
     ggml_add_cpu_backend_variant_impl("")

data/ext/sources/ggml/src/ggml-backend-reg.cpp CHANGED Viewed

@@ -69,6 +69,9 @@
 #if defined(__clang__)
 #    pragma clang diagnostic push
 #    pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(__GNUC__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #endif
 namespace fs = std::filesystem;
@@ -91,6 +94,8 @@ static std::string path_str(const fs::path & path) {
 #if defined(__clang__)
 #    pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#    pragma GCC diagnostic pop
 #endif
 #ifdef _WIN32

data/ext/sources/ggml/src/ggml-backend.cpp CHANGED Viewed

@@ -817,8 +817,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
         }
         if (sched->debug > 1) {
             ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
-            GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
-                fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
+            GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
+                fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
+                graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
             for (int j = 0; j < GGML_MAX_SRC; j++) {
                 struct ggml_tensor * src = node->src[j];
                 if (src == NULL) {
@@ -1340,7 +1341,10 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
     // allocate graph
     if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
         // the re-allocation may cause the split inputs to be moved to a different address
-        ggml_backend_sched_synchronize(sched);
+        // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
+        for (int i = 0; i < sched->n_backends; i++) {
+            ggml_backend_synchronize(sched->backends[i]);
+        }
 #ifndef NDEBUG
         GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
 #endif
@@ -1564,7 +1568,6 @@ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgra
     ggml_backend_sched_split_graph(sched, graph);
     if (!ggml_backend_sched_alloc_splits(sched)) {
         return false;
     }
@@ -1598,9 +1601,12 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
     for (int i = 0; i < sched->n_backends; i++) {
         ggml_backend_synchronize(sched->backends[i]);
     }
-    // reset the current copy to 0 so that the graphs will be similar during generation
-    // necessary for CUDA graphs
-    sched->cur_copy = 0;
+    if (!sched->is_alloc) {
+        // if the graph is not already allocated, always use copy 0 after a synchronization
+        // this ensures that during generation the same copy is used every time,
+        // which avoids changes in the graph that could cause CUDA or other graphs to be disabled
+        sched->cur_copy = 0;
+    }
 }
 void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
@@ -1821,7 +1827,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
     ggml_free(copy.ctx_unallocated);
 }
-bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
+bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
     struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
     if (copy.buffer == NULL) {
         return false;
@@ -1832,28 +1838,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
     assert(g1->n_nodes == g2->n_nodes);
-    for (int i = 0; i < g1->n_nodes; i++) {
-        struct ggml_tensor * t1 = g1->nodes[i];
-        struct ggml_tensor * t2 = g2->nodes[i];
+    if (test_node != nullptr) {
+        // Compute the whole graph and only test the output for a specific tensor
+        ggml_backend_graph_compute(backend1, g1);
+        ggml_backend_graph_compute(backend2, g2);
-        assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
+        int test_node_idx = -1;
+        for (int i = 0; i < g1->n_nodes; i++) {
+            struct ggml_tensor * t1 = g1->nodes[i];
+            if (t1 == test_node) {
+                test_node_idx = i;
+                break;
+            }
+        }
+        GGML_ASSERT(test_node_idx != -1);
-        struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
-        struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
+        callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
+    } else {
+        for (int i = 0; i < g1->n_nodes; i++) {
+            struct ggml_tensor * t1 = g1->nodes[i];
+            struct ggml_tensor * t2 = g2->nodes[i];
-        ggml_backend_graph_compute(backend1, &g1v);
-        ggml_backend_graph_compute(backend2, &g2v);
+            assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
-        if (ggml_is_view_op(t1->op)) {
-            continue;
-        }
+            struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
+            struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
-        // compare results, calculate rms etc
-        if (!callback(i, t1, t2, user_data)) {
-            break;
+            ggml_backend_graph_compute(backend1, &g1v);
+            ggml_backend_graph_compute(backend2, &g2v);
+            if (ggml_is_view_op(t1->op)) {
+                continue;
+            }
+            // compare results, calculate rms etc
+            if (!callback(i, t1, t2, user_data)) {
+                break;
+            }
         }
     }
     ggml_backend_graph_copy_free(copy);
     return true;

data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt CHANGED Viewed

@@ -81,7 +81,7 @@ if (BLAS_FOUND)
     target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
     target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
 else()
-    message(ERROR "BLAS not found, please refer to "
-                  "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
-                  " to set correct GGML_BLAS_VENDOR")
+    message(FATAL_ERROR "BLAS not found, please refer to "
+                        "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+                        " to set correct GGML_BLAS_VENDOR")
 endif()

data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt CHANGED Viewed

@@ -30,6 +30,7 @@ string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
 string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
 set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
 string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
+message(STATUS "CANN: SOC_VERSION =  ${SOC_VERSION}")
 if (CANN_INSTALL_DIR)
     # Only Support Linux.

data/ext/sources/ggml/src/ggml-cann/common.h CHANGED Viewed

@@ -37,6 +37,7 @@
 #include <thread>
 #include <unistd.h>
 #include <functional>
+#include <optional>
 #include "../include/ggml-cann.h"
 #include "../include/ggml.h"
@@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
 void ggml_cann_set_device(int32_t device);
 int32_t ggml_cann_get_device();
+std::optional<std::string> get_env(const std::string& name);
+bool parse_bool(const std::string& value);
 /**
  * @brief Abstract base class for memory pools used by CANN.
  */
@@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
         : device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
         ggml_cann_set_device(device);
         description = aclrtGetSocName();
-        async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
+        async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
         GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
             device, async_mode ? "ON" : "OFF");
     }