whispercpp 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +4 -2
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +150 -31
- data/ext/sources/examples/addon.node/index.js +3 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +3 -2
- data/ext/sources/examples/cli/cli.cpp +3 -2
- data/ext/sources/examples/command/command.cpp +32 -8
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +2 -0
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +169 -22
- data/ext/sources/examples/stream/stream.cpp +6 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +4 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +171 -3
- data/ext/sources/examples/talk-llama/llama-arch.h +28 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +741 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +112 -54
- data/ext/sources/examples/talk-llama/llama-chat.cpp +30 -8
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +520 -351
- data/ext/sources/examples/talk-llama/llama-context.h +38 -17
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +447 -372
- data/ext/sources/examples/talk-llama/llama-graph.h +128 -58
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +10 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +19 -2
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +14 -472
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +86 -26
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +88 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +42 -17
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1863 -563
- data/ext/sources/examples/talk-llama/llama-model.h +27 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +89 -6
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +65 -28
- data/ext/sources/examples/talk-llama/llama-vocab.h +1 -0
- data/ext/sources/examples/talk-llama/llama.cpp +11 -7
- data/ext/sources/examples/talk-llama/llama.h +147 -40
- data/ext/sources/examples/talk-llama/talk-llama.cpp +2 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +5 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +2 -0
- data/ext/sources/ggml/CMakeLists.txt +48 -3
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml.h +144 -5
- data/ext/sources/ggml/src/CMakeLists.txt +82 -24
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +5 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +46 -23
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +6 -1
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- data/ext/sources/ggml/src/ggml-common.h +4 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +133 -40
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +4 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +146 -105
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1057 -174
- data/ext/sources/ggml/src/ggml-cpu/ops.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +330 -38
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +111 -18
- data/ext/sources/ggml/src/ggml-cpu/vec.h +303 -94
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +60 -37
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +22 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +2 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +265 -123
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +257 -87
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +2 -3
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +5 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-impl.h +127 -183
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +27 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +331 -49
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +564 -282
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1859 -489
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/sources/ggml/src/ggml-quants.c +6 -8
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +28 -41
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +4 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +117 -165
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +192 -53
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +648 -1039
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +8 -105
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -100
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +60 -80
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +55 -74
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -10
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +138 -27
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +12 -16
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +726 -282
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +10 -1
- data/ext/sources/ggml/src/ggml.c +328 -48
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +24 -3
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +218 -169
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +56 -1
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +68 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +91 -43
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2739
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_params.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -1818,7 +1818,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
1818
1818
|
dpct::has_capability_or_fail(stream->get_device(),
|
1819
1819
|
{sycl::aspect::fp16});
|
1820
1820
|
|
1821
|
-
stream
|
1821
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
1822
1822
|
sycl::local_accessor<int, 1> tile_x_qs_q4_0_acc_ct1(
|
1823
1823
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
1824
1824
|
sycl::local_accessor<float, 1> tile_x_d_q4_0_acc_ct1(
|
@@ -1829,9 +1829,8 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
1829
1829
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
1830
1830
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
1831
1831
|
|
1832
|
-
|
1833
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
1834
|
-
[=](sycl::nd_item<3> item_ct1) {
|
1832
|
+
sycl_parallel_for(
|
1833
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
1835
1834
|
mul_mat_q4_0<need_check>(
|
1836
1835
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
1837
1836
|
nrows_dst, item_ct1,
|
@@ -1853,7 +1852,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
1853
1852
|
dpct::has_capability_or_fail(stream->get_device(),
|
1854
1853
|
{sycl::aspect::fp16});
|
1855
1854
|
|
1856
|
-
stream
|
1855
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
1857
1856
|
sycl::local_accessor<int, 1> tile_x_qs_q4_0_acc_ct1(
|
1858
1857
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
1859
1858
|
sycl::local_accessor<float, 1> tile_x_d_q4_0_acc_ct1(
|
@@ -1864,9 +1863,8 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
1864
1863
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
1865
1864
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
1866
1865
|
|
1867
|
-
|
1868
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
1869
|
-
[=](sycl::nd_item<3> item_ct1) {
|
1866
|
+
sycl_parallel_for(
|
1867
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
1870
1868
|
mul_mat_q4_0<need_check>(
|
1871
1869
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
1872
1870
|
nrows_dst, item_ct1,
|
@@ -1933,7 +1931,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
1933
1931
|
dpct::has_capability_or_fail(stream->get_device(),
|
1934
1932
|
{sycl::aspect::fp16});
|
1935
1933
|
|
1936
|
-
stream
|
1934
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
1937
1935
|
sycl::local_accessor<int, 1> tile_x_qs_q4_1_acc_ct1(
|
1938
1936
|
sycl::range<1>(mmq_y * (WARP_SIZE) + +mmq_y), cgh);
|
1939
1937
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q4_1_acc_ct1(
|
@@ -1944,9 +1942,8 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
1944
1942
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
1945
1943
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
1946
1944
|
|
1947
|
-
|
1948
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
1949
|
-
[=](sycl::nd_item<3> item_ct1) {
|
1945
|
+
sycl_parallel_for(
|
1946
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
1950
1947
|
mul_mat_q4_1<need_check>(
|
1951
1948
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
1952
1949
|
nrows_dst, item_ct1,
|
@@ -1968,7 +1965,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
1968
1965
|
dpct::has_capability_or_fail(stream->get_device(),
|
1969
1966
|
{sycl::aspect::fp16});
|
1970
1967
|
|
1971
|
-
stream
|
1968
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
1972
1969
|
sycl::local_accessor<int, 1> tile_x_qs_q4_1_acc_ct1(
|
1973
1970
|
sycl::range<1>(mmq_y * (WARP_SIZE) + +mmq_y), cgh);
|
1974
1971
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q4_1_acc_ct1(
|
@@ -1979,9 +1976,8 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
1979
1976
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
1980
1977
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
1981
1978
|
|
1982
|
-
|
1983
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
1984
|
-
[=](sycl::nd_item<3> item_ct1) {
|
1979
|
+
sycl_parallel_for(
|
1980
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
1985
1981
|
mul_mat_q4_1<need_check>(
|
1986
1982
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
1987
1983
|
nrows_dst, item_ct1,
|
@@ -2048,7 +2044,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2048
2044
|
dpct::has_capability_or_fail(stream->get_device(),
|
2049
2045
|
{sycl::aspect::fp16});
|
2050
2046
|
|
2051
|
-
stream
|
2047
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2052
2048
|
sycl::local_accessor<int, 1> tile_x_ql_q5_0_acc_ct1(
|
2053
2049
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2054
2050
|
sycl::local_accessor<float, 1> tile_x_d_q5_0_acc_ct1(
|
@@ -2059,9 +2055,8 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2059
2055
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2060
2056
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2061
2057
|
|
2062
|
-
|
2063
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2064
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2058
|
+
sycl_parallel_for(
|
2059
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2065
2060
|
mul_mat_q5_0<need_check>(
|
2066
2061
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2067
2062
|
nrows_dst, item_ct1,
|
@@ -2083,7 +2078,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2083
2078
|
dpct::has_capability_or_fail(stream->get_device(),
|
2084
2079
|
{sycl::aspect::fp16});
|
2085
2080
|
|
2086
|
-
stream
|
2081
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2087
2082
|
sycl::local_accessor<int, 1> tile_x_ql_q5_0_acc_ct1(
|
2088
2083
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2089
2084
|
sycl::local_accessor<float, 1> tile_x_d_q5_0_acc_ct1(
|
@@ -2094,9 +2089,8 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2094
2089
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2095
2090
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2096
2091
|
|
2097
|
-
|
2098
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2099
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2092
|
+
sycl_parallel_for(
|
2093
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2100
2094
|
mul_mat_q5_0<need_check>(
|
2101
2095
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2102
2096
|
nrows_dst, item_ct1,
|
@@ -2163,7 +2157,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
2163
2157
|
dpct::has_capability_or_fail(stream->get_device(),
|
2164
2158
|
{sycl::aspect::fp16});
|
2165
2159
|
|
2166
|
-
stream
|
2160
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2167
2161
|
sycl::local_accessor<int, 1> tile_x_ql_q5_1_acc_ct1(
|
2168
2162
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2169
2163
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q5_1_acc_ct1(
|
@@ -2174,9 +2168,8 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
2174
2168
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2175
2169
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2176
2170
|
|
2177
|
-
|
2178
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2179
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2171
|
+
sycl_parallel_for(
|
2172
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2180
2173
|
mul_mat_q5_1<need_check>(
|
2181
2174
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2182
2175
|
nrows_dst, item_ct1,
|
@@ -2198,7 +2191,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
2198
2191
|
dpct::has_capability_or_fail(stream->get_device(),
|
2199
2192
|
{sycl::aspect::fp16});
|
2200
2193
|
|
2201
|
-
stream
|
2194
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2202
2195
|
sycl::local_accessor<int, 1> tile_x_ql_q5_1_acc_ct1(
|
2203
2196
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2204
2197
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q5_1_acc_ct1(
|
@@ -2209,9 +2202,8 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
2209
2202
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2210
2203
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2211
2204
|
|
2212
|
-
|
2213
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2214
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2205
|
+
sycl_parallel_for(
|
2206
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2215
2207
|
mul_mat_q5_1<need_check>(
|
2216
2208
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2217
2209
|
nrows_dst, item_ct1,
|
@@ -2278,7 +2270,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2278
2270
|
dpct::has_capability_or_fail(stream->get_device(),
|
2279
2271
|
{sycl::aspect::fp16});
|
2280
2272
|
|
2281
|
-
stream
|
2273
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2282
2274
|
sycl::local_accessor<int, 1> tile_x_qs_q8_0_acc_ct1(
|
2283
2275
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2284
2276
|
sycl::local_accessor<float, 1> tile_x_d_q8_0_acc_ct1(
|
@@ -2289,9 +2281,8 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2289
2281
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2290
2282
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2291
2283
|
|
2292
|
-
|
2293
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2294
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2284
|
+
sycl_parallel_for(
|
2285
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2295
2286
|
mul_mat_q8_0<need_check>(
|
2296
2287
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2297
2288
|
nrows_dst, item_ct1,
|
@@ -2313,7 +2304,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2313
2304
|
dpct::has_capability_or_fail(stream->get_device(),
|
2314
2305
|
{sycl::aspect::fp16});
|
2315
2306
|
|
2316
|
-
stream
|
2307
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2317
2308
|
sycl::local_accessor<int, 1> tile_x_qs_q8_0_acc_ct1(
|
2318
2309
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2319
2310
|
sycl::local_accessor<float, 1> tile_x_d_q8_0_acc_ct1(
|
@@ -2324,9 +2315,8 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
2324
2315
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2325
2316
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2326
2317
|
|
2327
|
-
|
2328
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2329
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2318
|
+
sycl_parallel_for(
|
2319
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2330
2320
|
mul_mat_q8_0<need_check>(
|
2331
2321
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2332
2322
|
nrows_dst, item_ct1,
|
@@ -2393,7 +2383,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2393
2383
|
dpct::has_capability_or_fail(stream->get_device(),
|
2394
2384
|
{sycl::aspect::fp16});
|
2395
2385
|
|
2396
|
-
stream
|
2386
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2397
2387
|
sycl::local_accessor<int, 1> tile_x_ql_q2_K_acc_ct1(
|
2398
2388
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2399
2389
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q2_K_acc_ct1(
|
@@ -2406,9 +2396,8 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2406
2396
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2407
2397
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2408
2398
|
|
2409
|
-
|
2410
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2411
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2399
|
+
sycl_parallel_for(
|
2400
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2412
2401
|
mul_mat_q2_K<need_check>(
|
2413
2402
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2414
2403
|
nrows_dst, item_ct1,
|
@@ -2431,7 +2420,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2431
2420
|
dpct::has_capability_or_fail(stream->get_device(),
|
2432
2421
|
{sycl::aspect::fp16});
|
2433
2422
|
|
2434
|
-
stream
|
2423
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2435
2424
|
sycl::local_accessor<int, 1> tile_x_ql_q2_K_acc_ct1(
|
2436
2425
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2437
2426
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q2_K_acc_ct1(
|
@@ -2444,9 +2433,8 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2444
2433
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2445
2434
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2446
2435
|
|
2447
|
-
|
2448
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2449
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2436
|
+
sycl_parallel_for(
|
2437
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2450
2438
|
mul_mat_q2_K<need_check>(
|
2451
2439
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2452
2440
|
nrows_dst, item_ct1,
|
@@ -2516,7 +2504,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2516
2504
|
dpct::has_capability_or_fail(stream->get_device(),
|
2517
2505
|
{sycl::aspect::fp16});
|
2518
2506
|
|
2519
|
-
stream
|
2507
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2520
2508
|
sycl::local_accessor<int, 1> tile_x_ql_q3_K_acc_ct1(
|
2521
2509
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2522
2510
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q3_K_acc_ct1(
|
@@ -2531,9 +2519,8 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2531
2519
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2532
2520
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2533
2521
|
|
2534
|
-
|
2535
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2536
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2522
|
+
sycl_parallel_for(
|
2523
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2537
2524
|
mul_mat_q3_K<need_check>(
|
2538
2525
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2539
2526
|
nrows_dst, item_ct1,
|
@@ -2557,7 +2544,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2557
2544
|
dpct::has_capability_or_fail(stream->get_device(),
|
2558
2545
|
{sycl::aspect::fp16});
|
2559
2546
|
|
2560
|
-
stream
|
2547
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2561
2548
|
sycl::local_accessor<int, 1> tile_x_ql_q3_K_acc_ct1(
|
2562
2549
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2563
2550
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q3_K_acc_ct1(
|
@@ -2572,9 +2559,8 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2572
2559
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2573
2560
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2574
2561
|
|
2575
|
-
|
2576
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2577
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2562
|
+
sycl_parallel_for(
|
2563
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2578
2564
|
mul_mat_q3_K<need_check>(
|
2579
2565
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2580
2566
|
nrows_dst, item_ct1,
|
@@ -2644,7 +2630,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2644
2630
|
dpct::has_capability_or_fail(stream->get_device(),
|
2645
2631
|
{sycl::aspect::fp16});
|
2646
2632
|
|
2647
|
-
stream
|
2633
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2648
2634
|
sycl::local_accessor<int, 1> tile_x_ql_q4_K_acc_ct1(
|
2649
2635
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2650
2636
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q4_K_acc_ct1(
|
@@ -2657,9 +2643,8 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2657
2643
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2658
2644
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2659
2645
|
|
2660
|
-
|
2661
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2662
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2646
|
+
sycl_parallel_for(
|
2647
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2663
2648
|
mul_mat_q4_K<need_check>(
|
2664
2649
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2665
2650
|
nrows_dst, item_ct1,
|
@@ -2682,7 +2667,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2682
2667
|
dpct::has_capability_or_fail(stream->get_device(),
|
2683
2668
|
{sycl::aspect::fp16});
|
2684
2669
|
|
2685
|
-
stream
|
2670
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2686
2671
|
sycl::local_accessor<int, 1> tile_x_ql_q4_K_acc_ct1(
|
2687
2672
|
sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh);
|
2688
2673
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q4_K_acc_ct1(
|
@@ -2695,9 +2680,8 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2695
2680
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2696
2681
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2697
2682
|
|
2698
|
-
|
2699
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2700
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2683
|
+
sycl_parallel_for(
|
2684
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2701
2685
|
mul_mat_q4_K<need_check>(
|
2702
2686
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2703
2687
|
nrows_dst, item_ct1,
|
@@ -2765,7 +2749,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2765
2749
|
dpct::has_capability_or_fail(stream->get_device(),
|
2766
2750
|
{sycl::aspect::fp16});
|
2767
2751
|
|
2768
|
-
stream
|
2752
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2769
2753
|
sycl::local_accessor<int, 1> tile_x_ql_q5_K_acc_ct1(
|
2770
2754
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2771
2755
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q5_K_acc_ct1(
|
@@ -2778,9 +2762,8 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2778
2762
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2779
2763
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2780
2764
|
|
2781
|
-
|
2782
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2783
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2765
|
+
sycl_parallel_for(
|
2766
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2784
2767
|
mul_mat_q5_K<need_check>(
|
2785
2768
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2786
2769
|
nrows_dst, item_ct1,
|
@@ -2803,7 +2786,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2803
2786
|
dpct::has_capability_or_fail(stream->get_device(),
|
2804
2787
|
{sycl::aspect::fp16});
|
2805
2788
|
|
2806
|
-
stream
|
2789
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2807
2790
|
sycl::local_accessor<int, 1> tile_x_ql_q5_K_acc_ct1(
|
2808
2791
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2809
2792
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_q5_K_acc_ct1(
|
@@ -2816,9 +2799,8 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2816
2799
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2817
2800
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2818
2801
|
|
2819
|
-
|
2820
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2821
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2802
|
+
sycl_parallel_for(
|
2803
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2822
2804
|
mul_mat_q5_K<need_check>(
|
2823
2805
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2824
2806
|
nrows_dst, item_ct1,
|
@@ -2886,7 +2868,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2886
2868
|
dpct::has_capability_or_fail(stream->get_device(),
|
2887
2869
|
{sycl::aspect::fp16});
|
2888
2870
|
|
2889
|
-
stream
|
2871
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2890
2872
|
sycl::local_accessor<int, 1> tile_x_ql_acc_ct1(
|
2891
2873
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2892
2874
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_acc_ct1(
|
@@ -2899,9 +2881,8 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2899
2881
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2900
2882
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2901
2883
|
|
2902
|
-
|
2903
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2904
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2884
|
+
sycl_parallel_for(
|
2885
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2905
2886
|
mul_mat_q6_K<need_check>(
|
2906
2887
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2907
2888
|
nrows_dst, item_ct1,
|
@@ -2924,7 +2905,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2924
2905
|
dpct::has_capability_or_fail(stream->get_device(),
|
2925
2906
|
{sycl::aspect::fp16});
|
2926
2907
|
|
2927
|
-
stream
|
2908
|
+
sycl_launch(stream, [&](sycl::handler & cgh) {
|
2928
2909
|
sycl::local_accessor<int, 1> tile_x_ql_acc_ct1(
|
2929
2910
|
sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh);
|
2930
2911
|
sycl::local_accessor<sycl::half2, 1> tile_x_dm_acc_ct1(
|
@@ -2937,9 +2918,8 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
2937
2918
|
sycl::local_accessor<sycl::half2, 1> tile_y_ds_acc_ct1(
|
2938
2919
|
sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh);
|
2939
2920
|
|
2940
|
-
|
2941
|
-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
2942
|
-
[=](sycl::nd_item<3> item_ct1) {
|
2921
|
+
sycl_parallel_for(
|
2922
|
+
cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
|
2943
2923
|
mul_mat_q6_K<need_check>(
|
2944
2924
|
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
2945
2925
|
nrows_dst, item_ct1,
|