@novastera-oss/llamarn 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/android/src/main/cpp/include/llama.h +8 -3
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  10. package/cpp/LlamaCppModel.cpp +56 -22
  11. package/cpp/build-info.cpp +2 -2
  12. package/cpp/llama.cpp/CMakeLists.txt +1 -1
  13. package/cpp/llama.cpp/common/arg.cpp +7 -0
  14. package/cpp/llama.cpp/common/common.cpp +3 -0
  15. package/cpp/llama.cpp/common/common.h +1 -0
  16. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  17. package/cpp/llama.cpp/convert_hf_to_gguf.py +118 -20
  18. package/cpp/llama.cpp/ggml/CMakeLists.txt +1 -0
  19. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  20. package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
  21. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -0
  22. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
  23. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -2
  24. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
  25. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
  26. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
  27. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
  28. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  29. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
  30. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
  31. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
  32. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
  33. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
  34. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
  35. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
  36. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
  37. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
  38. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +83 -102
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +192 -67
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +211 -33
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
  48. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +54 -29
  49. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  54. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +84 -31
  55. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
  62. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +227 -41
  64. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +362 -182
  65. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  66. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +240 -535
  67. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  68. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
  69. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  70. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  71. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
  72. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
  73. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  74. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  75. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
  76. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
  77. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +45 -54
  78. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  79. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  80. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  81. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
  82. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  83. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
  84. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  85. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  89. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
  90. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +57 -1
  91. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
  92. package/cpp/llama.cpp/ggml/src/ggml.c +69 -13
  93. package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
  94. package/cpp/llama.cpp/gguf-py/gguf/constants.py +76 -0
  95. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +21 -0
  96. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +64 -0
  97. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
  98. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  99. package/cpp/llama.cpp/include/llama.h +8 -3
  100. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  101. package/cpp/llama.cpp/src/llama-arch.cpp +55 -0
  102. package/cpp/llama.cpp/src/llama-arch.h +18 -0
  103. package/cpp/llama.cpp/src/llama-batch.cpp +570 -359
  104. package/cpp/llama.cpp/src/llama-batch.h +98 -70
  105. package/cpp/llama.cpp/src/llama-chat.cpp +11 -6
  106. package/cpp/llama.cpp/src/llama-context.cpp +101 -107
  107. package/cpp/llama.cpp/src/llama-context.h +13 -13
  108. package/cpp/llama.cpp/src/llama-graph.cpp +199 -252
  109. package/cpp/llama.cpp/src/llama-graph.h +44 -32
  110. package/cpp/llama.cpp/src/llama-hparams.cpp +4 -0
  111. package/cpp/llama.cpp/src/llama-hparams.h +8 -0
  112. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +51 -53
  113. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +19 -24
  114. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +110 -104
  115. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +17 -22
  116. package/cpp/llama.cpp/src/llama-kv-cells.h +35 -11
  117. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +66 -67
  118. package/cpp/llama.cpp/src/llama-memory-hybrid.h +16 -21
  119. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +69 -68
  120. package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
  121. package/cpp/llama.cpp/src/llama-memory.h +18 -22
  122. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  123. package/cpp/llama.cpp/src/llama-model.cpp +1006 -472
  124. package/cpp/llama.cpp/src/llama-model.h +22 -0
  125. package/cpp/llama.cpp/src/llama-quant.cpp +87 -5
  126. package/cpp/llama.cpp/src/llama-vocab.cpp +26 -3
  127. package/cpp/llama.cpp/src/llama-vocab.h +1 -0
  128. package/cpp/rn-utils.h +3 -0
  129. package/ios/include/common.h +1 -0
  130. package/ios/include/llama.h +8 -3
  131. package/ios/libs/llama.xcframework/Info.plist +19 -19
  132. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  133. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
  134. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  135. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
  136. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -3
  137. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  138. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  139. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
  140. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
  141. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  142. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  143. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
  144. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  145. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  146. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
  147. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3744
  148. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  149. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
  150. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -3
  151. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  152. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
  153. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -3
  154. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  155. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  156. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
  157. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -3
  158. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  159. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  160. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  161. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
  162. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  163. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
  164. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -3
  165. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  166. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  167. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
  168. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
  169. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  170. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  171. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
  172. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  173. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  174. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4900
  175. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  176. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
  177. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -3
  178. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  179. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  180. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4871
  181. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3773
  182. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  183. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  184. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
  185. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  186. package/package.json +1 -1
@@ -544,12 +544,12 @@ static void reorder_mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy,
544
544
  const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, (block_num_y * WARP_SIZE));
545
545
  const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
546
546
 
547
- stream->submit([&](sycl::handler & cgh) {
548
- cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
549
- [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
550
- mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0>>(vx, vy, dst, ncols, nrows,
551
- nd_item);
552
- });
547
+ sycl_launch(stream, [&](sycl::handler & cgh) {
548
+ sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size),
549
+ [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
550
+ mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0>>(vx, vy, dst, ncols, nrows,
551
+ nd_item);
552
+ });
553
553
  });
554
554
  }
555
555
 
@@ -561,12 +561,12 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy, float *
561
561
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
562
562
 
563
563
  {
564
- stream->submit([&](sycl::handler & cgh) {
565
- cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
566
- [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
567
- mul_mat_vec_q<QK4_0, QI4_0, block_q4_0, VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
568
- vx, vy, dst, ncols, nrows, item_ct1);
569
- });
564
+ sycl_launch(stream, [&](sycl::handler & cgh) {
565
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
566
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
567
+ mul_mat_vec_q<QK4_0, QI4_0, block_q4_0, VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
568
+ vx, vy, dst, ncols, nrows, item_ct1);
569
+ });
570
570
  });
571
571
  }
572
572
  }
@@ -580,17 +580,12 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
580
580
  const sycl::range<3> block_nums(1, 1, block_num_y);
581
581
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
582
582
  {
583
-
584
- stream->submit([&](sycl::handler &cgh) {
585
-
586
- cgh.parallel_for(
587
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
588
- [=](sycl::nd_item<3> item_ct1)
589
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
590
- mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
591
- VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
592
- vx, vy, dst, ncols, nrows, item_ct1);
593
- });
583
+ sycl_launch(stream, [&](sycl::handler & cgh) {
584
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
585
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
586
+ mul_mat_vec_q<QK4_0, QI4_1, block_q4_1, VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
587
+ vx, vy, dst, ncols, nrows, item_ct1);
588
+ });
594
589
  });
595
590
  }
596
591
  }
@@ -604,17 +599,12 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
604
599
  const sycl::range<3> block_nums(1, 1, block_num_y);
605
600
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
606
601
  {
607
-
608
- stream->submit([&](sycl::handler &cgh) {
609
-
610
- cgh.parallel_for(
611
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
612
- [=](sycl::nd_item<3> item_ct1)
613
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
614
- mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
615
- VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
616
- vx, vy, dst, ncols, nrows, item_ct1);
617
- });
602
+ sycl_launch(stream, [&](sycl::handler & cgh) {
603
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
604
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
605
+ mul_mat_vec_q<QK5_0, QI5_0, block_q5_0, VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
606
+ vx, vy, dst, ncols, nrows, item_ct1);
607
+ });
618
608
  });
619
609
  }
620
610
  }
@@ -628,17 +618,12 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
628
618
  const sycl::range<3> block_nums(1, 1, block_num_y);
629
619
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
630
620
  {
631
-
632
- stream->submit([&](sycl::handler &cgh) {
633
-
634
- cgh.parallel_for(
635
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
636
- [=](sycl::nd_item<3> item_ct1)
637
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
638
- mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
639
- VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
640
- vx, vy, dst, ncols, nrows, item_ct1);
641
- });
621
+ sycl_launch(stream, [&](sycl::handler & cgh) {
622
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
623
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
624
+ mul_mat_vec_q<QK5_1, QI5_1, block_q5_1, VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
625
+ vx, vy, dst, ncols, nrows, item_ct1);
626
+ });
642
627
  });
643
628
  }
644
629
  }
@@ -652,17 +637,12 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
652
637
  const sycl::range<3> block_nums(1, 1, block_num_y);
653
638
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
654
639
  {
655
-
656
- stream->submit([&](sycl::handler &cgh) {
657
-
658
- cgh.parallel_for(
659
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
660
- [=](sycl::nd_item<3> item_ct1)
661
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
662
- mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
663
- VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
664
- vx, vy, dst, ncols, nrows, item_ct1);
665
- });
640
+ sycl_launch(stream, [&](sycl::handler & cgh) {
641
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
642
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
643
+ mul_mat_vec_q<QK8_0, QI8_0, block_q8_0, VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
644
+ vx, vy, dst, ncols, nrows, item_ct1);
645
+ });
666
646
  });
667
647
  }
668
648
  }
@@ -676,17 +656,12 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
676
656
  const sycl::range<3> block_nums(1, 1, block_num_y);
677
657
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
678
658
  {
679
-
680
- stream->submit([&](sycl::handler &cgh) {
681
-
682
- cgh.parallel_for(
683
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
684
- [=](sycl::nd_item<3> item_ct1)
685
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
686
- mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
687
- VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
688
- vx, vy, dst, ncols, nrows, item_ct1);
689
- });
659
+ sycl_launch(stream, [&](sycl::handler & cgh) {
660
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
661
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
662
+ mul_mat_vec_q<QK_K, QI2_K, block_q2_K, VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
663
+ vx, vy, dst, ncols, nrows, item_ct1);
664
+ });
690
665
  });
691
666
  }
692
667
  }
@@ -700,17 +675,12 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
700
675
  const sycl::range<3> block_nums(1, 1, block_num_y);
701
676
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
702
677
  {
703
-
704
- stream->submit([&](sycl::handler &cgh) {
705
-
706
- cgh.parallel_for(
707
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
708
- [=](sycl::nd_item<3> item_ct1)
709
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
710
- mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
711
- VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
712
- vx, vy, dst, ncols, nrows, item_ct1);
713
- });
678
+ sycl_launch(stream, [&](sycl::handler & cgh) {
679
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
680
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
681
+ mul_mat_vec_q<QK_K, QI3_K, block_q3_K, VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
682
+ vx, vy, dst, ncols, nrows, item_ct1);
683
+ });
714
684
  });
715
685
  }
716
686
  }
@@ -724,17 +694,12 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
724
694
  const sycl::range<3> block_nums(1, 1, block_num_y);
725
695
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
726
696
  {
727
-
728
- stream->submit([&](sycl::handler &cgh) {
729
-
730
- cgh.parallel_for(
731
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
732
- [=](sycl::nd_item<3> item_ct1)
733
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
734
- mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
735
- VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
736
- vx, vy, dst, ncols, nrows, item_ct1);
737
- });
697
+ sycl_launch(stream, [&](sycl::handler & cgh) {
698
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
699
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
700
+ mul_mat_vec_q<QK_K, QI4_K, block_q4_K, VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
701
+ vx, vy, dst, ncols, nrows, item_ct1);
702
+ });
738
703
  });
739
704
  }
740
705
  }
@@ -750,12 +715,12 @@ static void reorder_mul_mat_vec_q4_k_q8_1_sycl(const void * vx, const void * vy,
750
715
  const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
751
716
  const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
752
717
 
753
- stream->submit([&](sycl::handler & cgh) {
754
- cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
755
- [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
756
- mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K>>(vx, vy, dst, ncols,
757
- nrows, nd_item);
758
- });
718
+ sycl_launch(stream, [&](sycl::handler & cgh) {
719
+ sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size),
720
+ [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
721
+ mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K>>(vx, vy, dst, ncols, nrows,
722
+ nd_item);
723
+ });
759
724
  });
760
725
  }
761
726
 
@@ -769,17 +734,12 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
769
734
  const sycl::range<3> block_nums(1, 1, block_num_y);
770
735
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
771
736
  {
772
-
773
- stream->submit([&](sycl::handler &cgh) {
774
-
775
- cgh.parallel_for(
776
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
777
- [=](sycl::nd_item<3> item_ct1)
778
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
779
- mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
780
- VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
781
- vx, vy, dst, ncols, nrows, item_ct1);
782
- });
737
+ sycl_launch(stream, [&](sycl::handler & cgh) {
738
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
739
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
740
+ mul_mat_vec_q<QK_K, QI5_K, block_q5_K, VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
741
+ vx, vy, dst, ncols, nrows, item_ct1);
742
+ });
783
743
  });
784
744
  }
785
745
  }
@@ -794,12 +754,12 @@ static void reorder_mul_mat_vec_q6_k_q8_1_sycl(const void * vx, const void * vy,
794
754
  const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
795
755
  const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
796
756
 
797
- stream->submit([&](sycl::handler & cgh) {
798
- cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
799
- [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
800
- mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K>>(vx, vy, dst, ncols, nrows,
801
- nd_item);
802
- });
757
+ sycl_launch(stream, [&](sycl::handler & cgh) {
758
+ sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size),
759
+ [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
760
+ mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K>>(vx, vy, dst, ncols, nrows,
761
+ nd_item);
762
+ });
803
763
  });
804
764
  }
805
765
  static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
@@ -811,17 +771,12 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
811
771
  const sycl::range<3> block_nums(1, 1, block_num_y);
812
772
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
813
773
  {
814
-
815
- stream->submit([&](sycl::handler &cgh) {
816
-
817
- cgh.parallel_for(
818
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
819
- [=](sycl::nd_item<3> item_ct1)
820
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
821
- mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
822
- VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
823
- vx, vy, dst, ncols, nrows, item_ct1);
824
- });
774
+ sycl_launch(stream, [&](sycl::handler & cgh) {
775
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
776
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
777
+ mul_mat_vec_q<QK_K, QI6_K, block_q6_K, VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
778
+ vx, vy, dst, ncols, nrows, item_ct1);
779
+ });
825
780
  });
826
781
  }
827
782
  }
@@ -836,14 +791,12 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
836
791
  const sycl::range<3> block_nums(1, 1, block_num_y);
837
792
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
838
793
  {
839
- stream->submit([&](sycl::handler &cgh) {
840
- cgh.parallel_for(
841
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
842
- [=](sycl::nd_item<3> item_ct1)
843
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
844
- mul_mat_vec_q_iq2_xxs_q8_1<QK_K, QI2_XXS/2, block_iq2_xxs, 1>(
845
- vx, vy, dst, ncols, nrows, item_ct1);
846
- });
794
+ sycl_launch(stream, [&](sycl::handler & cgh) {
795
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
796
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
797
+ mul_mat_vec_q_iq2_xxs_q8_1<QK_K, QI2_XXS / 2, block_iq2_xxs, 1>(vx, vy, dst, ncols,
798
+ nrows, item_ct1);
799
+ });
847
800
  });
848
801
  }
849
802
  }
@@ -857,14 +810,12 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
857
810
  const sycl::range<3> block_nums(1, 1, block_num_y);
858
811
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
859
812
  {
860
- stream->submit([&](sycl::handler & cgh) {
861
- cgh.parallel_for(
862
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
863
- [=](sycl::nd_item<3> item_ct1)
864
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
865
- mul_mat_vec_q_iq2_xs_q8_1<QK_K, QI2_XS/2, block_iq2_xs, 1>(
866
- vx, vy, dst, ncols, nrows, item_ct1);
867
- });
813
+ sycl_launch(stream, [&](sycl::handler & cgh) {
814
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
815
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
816
+ mul_mat_vec_q_iq2_xs_q8_1<QK_K, QI2_XS / 2, block_iq2_xs, 1>(vx, vy, dst, ncols,
817
+ nrows, item_ct1);
818
+ });
868
819
  });
869
820
  }
870
821
  }
@@ -878,15 +829,12 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy,
878
829
  const sycl::range<3> block_nums(1, 1, block_num_y);
879
830
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
880
831
  {
881
-
882
- stream->submit([&](sycl::handler &cgh) {
883
- cgh.parallel_for(
884
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
885
- [=](sycl::nd_item<3> item_ct1)
886
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
887
- mul_mat_vec_q_iq2_s_q8_1<QK_K, QI2_S/2, block_iq2_s, 1>(
888
- vx, vy, dst, ncols, nrows, item_ct1);
889
- });
832
+ sycl_launch(stream, [&](sycl::handler & cgh) {
833
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
834
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
835
+ mul_mat_vec_q_iq2_s_q8_1<QK_K, QI2_S / 2, block_iq2_s, 1>(vx, vy, dst, ncols, nrows,
836
+ item_ct1);
837
+ });
890
838
  });
891
839
  }
892
840
  }
@@ -900,15 +848,12 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
900
848
  const sycl::range<3> block_nums(1, 1, block_num_y);
901
849
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
902
850
  {
903
-
904
- stream->submit([&](sycl::handler &cgh) {
905
- cgh.parallel_for(
906
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
907
- [=](sycl::nd_item<3> item_ct1)
908
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
909
- mul_mat_vec_q_iq3_xxs_q8_1<QK_K, QI3_XXS/2, block_iq3_xxs, 1>(
910
- vx, vy, dst, ncols, nrows, item_ct1);
911
- });
851
+ sycl_launch(stream, [&](sycl::handler & cgh) {
852
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
853
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
854
+ mul_mat_vec_q_iq3_xxs_q8_1<QK_K, QI3_XXS / 2, block_iq3_xxs, 1>(vx, vy, dst, ncols,
855
+ nrows, item_ct1);
856
+ });
912
857
  });
913
858
  }
914
859
  }
@@ -922,15 +867,12 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
922
867
  const sycl::range<3> block_nums(1, 1, block_num_y);
923
868
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
924
869
  {
925
-
926
- stream->submit([&](sycl::handler &cgh) {
927
- cgh.parallel_for(
928
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
929
- [=](sycl::nd_item<3> item_ct1)
930
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
931
- mul_mat_vec_q_iq3_s_q8_1<QK_K, QI3_S/2, block_iq3_s, 1>(
932
- vx, vy, dst, ncols, nrows, item_ct1);
933
- });
870
+ sycl_launch(stream, [&](sycl::handler & cgh) {
871
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
872
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
873
+ mul_mat_vec_q_iq3_s_q8_1<QK_K, QI3_S / 2, block_iq3_s, 1>(vx, vy, dst, ncols, nrows,
874
+ item_ct1);
875
+ });
934
876
  });
935
877
  }
936
878
  }
@@ -944,15 +886,12 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
944
886
  const sycl::range<3> block_nums(1, 1, block_num_y);
945
887
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
946
888
  {
947
-
948
- stream->submit([&](sycl::handler &cgh) {
949
- cgh.parallel_for(
950
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
951
- [=](sycl::nd_item<3> item_ct1)
952
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
953
- mul_mat_vec_q_iq1_s_q8_1<QK_K, QI1_S, block_iq1_s, 1>(
954
- vx, vy, dst, ncols, nrows, item_ct1);
955
- });
889
+ sycl_launch(stream, [&](sycl::handler & cgh) {
890
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
891
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
892
+ mul_mat_vec_q_iq1_s_q8_1<QK_K, QI1_S, block_iq1_s, 1>(vx, vy, dst, ncols, nrows,
893
+ item_ct1);
894
+ });
956
895
  });
957
896
  }
958
897
  }
@@ -966,14 +905,12 @@ static void mul_mat_vec_iq1_m_q8_1_sycl(const void *vx, const void *vy,
966
905
  const sycl::range<3> block_nums(1, 1, block_num_y);
967
906
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
968
907
  {
969
- stream->submit([&](sycl::handler &cgh) {
970
- cgh.parallel_for(
971
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
972
- [=](sycl::nd_item<3> item_ct1)
973
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
974
- mul_mat_vec_q_iq1_m_q8_1<QK_K, QI1_S, block_iq1_m, 1>(
975
- vx, vy, dst, ncols, nrows, item_ct1);
976
- });
908
+ sycl_launch(stream, [&](sycl::handler & cgh) {
909
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
910
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
911
+ mul_mat_vec_q_iq1_m_q8_1<QK_K, QI1_S, block_iq1_m, 1>(vx, vy, dst, ncols, nrows,
912
+ item_ct1);
913
+ });
977
914
  });
978
915
  }
979
916
  }
@@ -987,15 +924,12 @@ static void mul_mat_vec_iq4_nl_q8_1_sycl(const void *vx, const void *vy,
987
924
  const sycl::range<3> block_nums(1, 1, block_num_y);
988
925
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
989
926
  {
990
-
991
- stream->submit([&](sycl::handler &cgh) {
992
- cgh.parallel_for(
993
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
994
- [=](sycl::nd_item<3> item_ct1)
995
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
996
- mul_mat_vec_q_iq4_nl_q8_1<QK4_NL, QI4_NL, block_iq4_nl, 2>(
997
- vx, vy, dst, ncols, nrows, item_ct1);
998
- });
927
+ sycl_launch(stream, [&](sycl::handler & cgh) {
928
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
929
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
930
+ mul_mat_vec_q_iq4_nl_q8_1<QK4_NL, QI4_NL, block_iq4_nl, 2>(vx, vy, dst, ncols, nrows,
931
+ item_ct1);
932
+ });
999
933
  });
1000
934
  }
1001
935
  }
@@ -1009,15 +943,12 @@ static void mul_mat_vec_iq4_xs_q8_1_sycl(const void *vx, const void *vy,
1009
943
  const sycl::range<3> block_nums(1, 1, block_num_y);
1010
944
  const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
1011
945
  {
1012
-
1013
- stream->submit([&](sycl::handler &cgh) {
1014
- cgh.parallel_for(
1015
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
1016
- [=](sycl::nd_item<3> item_ct1)
1017
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
1018
- mul_mat_vec_q_iq4_xs_q8_1<QK_K, QI4_XS/4, block_iq4_xs, 1>(
1019
- vx, vy, dst, ncols, nrows, item_ct1);
1020
- });
946
+ sycl_launch(stream, [&](sycl::handler & cgh) {
947
+ sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims),
948
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
949
+ mul_mat_vec_q_iq4_xs_q8_1<QK_K, QI4_XS / 4, block_iq4_xs, 1>(vx, vy, dst, ncols,
950
+ nrows, item_ct1);
951
+ });
1021
952
  });
1022
953
  }
1023
954
  }