@fugood/llama.node 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +17 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +3 -1
  19. package/lib/index.js +16 -1
  20. package/lib/index.ts +16 -0
  21. package/package.json +1 -1
  22. package/src/EmbeddingWorker.cpp +4 -3
  23. package/src/LlamaCompletionWorker.cpp +4 -2
  24. package/src/LlamaContext.cpp +61 -6
  25. package/src/LlamaContext.h +1 -0
  26. package/src/common.hpp +6 -11
  27. package/src/llama.cpp/.github/workflows/build.yml +19 -17
  28. package/src/llama.cpp/.github/workflows/docker.yml +77 -30
  29. package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +22 -3
  31. package/src/llama.cpp/CMakeLists.txt +49 -24
  32. package/src/llama.cpp/common/arg.cpp +82 -26
  33. package/src/llama.cpp/common/arg.h +3 -0
  34. package/src/llama.cpp/common/common.cpp +192 -72
  35. package/src/llama.cpp/common/common.h +51 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +12 -12
  37. package/src/llama.cpp/common/ngram-cache.h +2 -2
  38. package/src/llama.cpp/common/sampling.cpp +11 -6
  39. package/src/llama.cpp/common/speculative.cpp +18 -15
  40. package/src/llama.cpp/docs/build.md +2 -0
  41. package/src/llama.cpp/examples/batched/batched.cpp +9 -7
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
  43. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
  44. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
  45. package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
  46. package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
  47. package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
  48. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
  50. package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
  51. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
  52. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
  53. package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
  54. package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
  55. package/src/llama.cpp/examples/infill/infill.cpp +23 -24
  56. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
  57. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
  58. package/src/llama.cpp/examples/llava/clip.cpp +4 -2
  59. package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
  60. package/src/llama.cpp/examples/llava/llava.cpp +2 -2
  61. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
  62. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
  63. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
  64. package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
  65. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
  66. package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
  67. package/src/llama.cpp/examples/main/main.cpp +51 -29
  68. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
  69. package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
  70. package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
  71. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
  72. package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
  73. package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
  74. package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
  76. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
  77. package/src/llama.cpp/examples/run/run.cpp +175 -61
  78. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
  79. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
  80. package/src/llama.cpp/examples/server/httplib.h +1295 -409
  81. package/src/llama.cpp/examples/server/server.cpp +387 -181
  82. package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
  83. package/src/llama.cpp/examples/server/utils.hpp +170 -58
  84. package/src/llama.cpp/examples/simple/simple.cpp +9 -8
  85. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
  86. package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
  87. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
  88. package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
  89. package/src/llama.cpp/examples/tts/tts.cpp +64 -23
  90. package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
  91. package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
  92. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
  93. package/src/llama.cpp/ggml/include/ggml.h +36 -145
  94. package/src/llama.cpp/ggml/include/gguf.h +202 -0
  95. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  96. package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
  97. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
  98. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
  99. package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
  100. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
  101. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
  102. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
  103. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
  105. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
  106. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
  107. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
  109. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
  111. package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
  112. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
  113. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
  115. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
  117. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
  120. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
  121. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
  124. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
  125. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
  126. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  128. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
  129. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
  130. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
  131. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
  132. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
  133. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
  134. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
  135. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
  138. package/src/llama.cpp/ggml/src/ggml.c +117 -1327
  139. package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
  140. package/src/llama.cpp/include/llama-cpp.h +6 -1
  141. package/src/llama.cpp/include/llama.h +138 -75
  142. package/src/llama.cpp/src/CMakeLists.txt +13 -1
  143. package/src/llama.cpp/src/llama-adapter.cpp +347 -0
  144. package/src/llama.cpp/src/llama-adapter.h +74 -0
  145. package/src/llama.cpp/src/llama-arch.cpp +1487 -0
  146. package/src/llama.cpp/src/llama-arch.h +400 -0
  147. package/src/llama.cpp/src/llama-batch.cpp +368 -0
  148. package/src/llama.cpp/src/llama-batch.h +88 -0
  149. package/src/llama.cpp/src/llama-chat.cpp +578 -0
  150. package/src/llama.cpp/src/llama-chat.h +52 -0
  151. package/src/llama.cpp/src/llama-context.cpp +1775 -0
  152. package/src/llama.cpp/src/llama-context.h +128 -0
  153. package/src/llama.cpp/src/llama-cparams.cpp +1 -0
  154. package/src/llama.cpp/src/llama-cparams.h +37 -0
  155. package/src/llama.cpp/src/llama-grammar.cpp +5 -4
  156. package/src/llama.cpp/src/llama-grammar.h +3 -1
  157. package/src/llama.cpp/src/llama-hparams.cpp +71 -0
  158. package/src/llama.cpp/src/llama-hparams.h +139 -0
  159. package/src/llama.cpp/src/llama-impl.cpp +167 -0
  160. package/src/llama.cpp/src/llama-impl.h +16 -136
  161. package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
  162. package/src/llama.cpp/src/llama-kv-cache.h +218 -0
  163. package/src/llama.cpp/src/llama-mmap.cpp +589 -0
  164. package/src/llama.cpp/src/llama-mmap.h +67 -0
  165. package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
  166. package/src/llama.cpp/src/llama-model-loader.h +167 -0
  167. package/src/llama.cpp/src/llama-model.cpp +3953 -0
  168. package/src/llama.cpp/src/llama-model.h +370 -0
  169. package/src/llama.cpp/src/llama-quant.cpp +934 -0
  170. package/src/llama.cpp/src/llama-quant.h +1 -0
  171. package/src/llama.cpp/src/llama-sampling.cpp +147 -32
  172. package/src/llama.cpp/src/llama-sampling.h +3 -19
  173. package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
  174. package/src/llama.cpp/src/llama-vocab.h +97 -142
  175. package/src/llama.cpp/src/llama.cpp +7160 -20314
  176. package/src/llama.cpp/src/unicode.cpp +8 -3
  177. package/src/llama.cpp/tests/CMakeLists.txt +2 -0
  178. package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
  179. package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
  180. package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
  181. package/src/llama.cpp/tests/test-gguf.cpp +222 -187
  182. package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
  183. package/src/llama.cpp/tests/test-sampling.cpp +0 -1
  184. package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
  185. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
  186. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
@@ -82,6 +82,14 @@ inline std::string get_device_backend_and_type(const sycl::device &device) {
82
82
  return device_type.str();
83
83
  }
84
84
 
85
+ template <typename Ts> struct matrix_info_t {
86
+ oneapi::mkl::transpose transpose_info[2];
87
+ Ts value_info[2];
88
+ std::int64_t size_info[3];
89
+ std::int64_t ld_info[3];
90
+ std::int64_t groupsize_info;
91
+ };
92
+
85
93
  namespace dpct
86
94
  {
87
95
  typedef sycl::queue *queue_ptr;
@@ -1727,26 +1735,13 @@ namespace dpct
1727
1735
  };
1728
1736
 
1729
1737
  template <class Ta, class Tb, class Tc, class Ts>
1730
- inline void gemm_batch_impl(sycl::queue &q, oneapi::mkl::transpose a_trans,
1731
- oneapi::mkl::transpose b_trans, int m, int n, int k,
1732
- const void *alpha, const void **a, int lda,
1733
- const void **b, int ldb, const void *beta, void **c,
1734
- int ldc, int batch_size)
1735
- {
1736
- struct matrix_info_t
1737
- {
1738
- oneapi::mkl::transpose transpose_info[2];
1739
- Ts value_info[2];
1740
- std::int64_t size_info[3];
1741
- std::int64_t ld_info[3];
1742
- std::int64_t groupsize_info;
1743
- };
1744
-
1738
+ inline void gemm_batch_impl(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans,
1739
+ int m, int n, int k, const void * alpha, const void ** a, int lda, const void ** b,
1740
+ int ldb, const void * beta, void ** c, int ldc, int batch_size,
1741
+ matrix_info_t<float> * matrix_info) {
1745
1742
  Ts alpha_value = dpct::get_value(reinterpret_cast<const Ts *>(alpha), q);
1746
1743
  Ts beta_value = dpct::get_value(reinterpret_cast<const Ts *>(beta), q);
1747
1744
 
1748
- matrix_info_t *matrix_info =
1749
- (matrix_info_t *)std::malloc(sizeof(matrix_info_t));
1750
1745
  matrix_info->transpose_info[0] = a_trans;
1751
1746
  matrix_info->transpose_info[1] = b_trans;
1752
1747
  matrix_info->value_info[0] = alpha_value;
@@ -1763,23 +1758,18 @@ namespace dpct
1763
1758
  sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
1764
1759
  oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q }, matrix_info->transpose_info,
1765
1760
  matrix_info->transpose_info + 1, matrix_info->size_info, matrix_info->size_info + 1,
1766
- matrix_info->size_info + 2, matrix_info->value_info, reinterpret_cast<const Ta **>(a),
1767
- matrix_info->ld_info, reinterpret_cast<const Tb **>(b), matrix_info->ld_info + 1,
1768
- matrix_info->value_info + 1, reinterpret_cast<Tc **>(c), matrix_info->ld_info + 2, 1,
1769
- &(matrix_info->groupsize_info));
1761
+ matrix_info->size_info + 2, reinterpret_cast<Ts *>(matrix_info->value_info),
1762
+ reinterpret_cast<const Ta **>(a), matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
1763
+ matrix_info->ld_info + 1, reinterpret_cast<Ts *>(matrix_info->value_info + 1),
1764
+ reinterpret_cast<Tc **>(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
1770
1765
  #else
1771
1766
  sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
1772
1767
  q, matrix_info->transpose_info, matrix_info->transpose_info + 1, matrix_info->size_info,
1773
- matrix_info->size_info + 1, matrix_info->size_info + 2, matrix_info->value_info,
1768
+ matrix_info->size_info + 1, matrix_info->size_info + 2, reinterpret_cast<Ts *>(matrix_info->value_info),
1774
1769
  reinterpret_cast<const Ta **>(a), matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
1775
- matrix_info->ld_info + 1, matrix_info->value_info + 1, reinterpret_cast<Tc **>(c),
1776
- matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
1770
+ matrix_info->ld_info + 1, reinterpret_cast<Ts *>(matrix_info->value_info + 1),
1771
+ reinterpret_cast<Tc **>(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
1777
1772
  #endif
1778
-
1779
- q.submit([&](sycl::handler &cgh)
1780
- {
1781
- cgh.depends_on(e);
1782
- cgh.host_task([=] { std::free(matrix_info); }); });
1783
1773
  }
1784
1774
 
1785
1775
  template <class Ta, class Tb, class Tc, class Ts>
@@ -2422,25 +2412,11 @@ namespace dpct
2422
2412
  /// \param [in] ldc Leading dimension of C.
2423
2413
  /// \param [in] batch_size Specifies the number of matrix multiply operations to perform.
2424
2414
  /// \param [in] scaling_type Data type of the scaling factors.
2425
- inline void gemm_batch(sycl::queue &q, oneapi::mkl::transpose a_trans,
2426
- oneapi::mkl::transpose b_trans, int m, int n, int k,
2427
- const void *alpha, const void *a[],
2428
- library_data_t a_type, int lda, const void *b[],
2429
- library_data_t b_type, int ldb, const void *beta,
2430
- void *c[], library_data_t c_type, int ldc,
2431
- int batch_size, library_data_t scaling_type)
2432
- {
2433
- if (scaling_type == library_data_t::real_float &&
2434
- c_type == library_data_t::complex_float)
2435
- {
2436
- scaling_type = library_data_t::complex_float;
2437
- }
2438
- else if (scaling_type == library_data_t::real_double &&
2439
- c_type == library_data_t::complex_double)
2440
- {
2441
- scaling_type = library_data_t::complex_double;
2442
- }
2443
-
2415
+ inline void gemm_batch(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans, int m,
2416
+ int n, int k, const void * alpha, const void * a[], library_data_t a_type, int lda,
2417
+ const void * b[], library_data_t b_type, int ldb, const void * beta, void * c[],
2418
+ library_data_t c_type, int ldc, int batch_size, library_data_t scaling_type,
2419
+ matrix_info_t<float> * matrix_info) {
2444
2420
  std::uint64_t key =
2445
2421
  detail::get_type_combination_id(a_type, b_type, c_type, scaling_type);
2446
2422
  switch (key)
@@ -2449,48 +2425,24 @@ namespace dpct
2449
2425
  library_data_t::real_float, library_data_t::real_float,
2450
2426
  library_data_t::real_float, library_data_t::real_float):
2451
2427
  {
2452
- detail::gemm_batch_impl<float, float, float, float>(
2453
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2454
- batch_size);
2428
+ detail::gemm_batch_impl<float, float, float, float>(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb,
2429
+ beta, c, ldc, batch_size, matrix_info);
2455
2430
  break;
2456
2431
  }
2457
2432
  case detail::get_type_combination_id(
2458
2433
  library_data_t::real_double, library_data_t::real_double,
2459
2434
  library_data_t::real_double, library_data_t::real_double):
2460
2435
  {
2461
- detail::gemm_batch_impl<double, double, double, double>(
2462
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2463
- batch_size);
2464
- break;
2465
- }
2466
- case detail::get_type_combination_id(
2467
- library_data_t::complex_float, library_data_t::complex_float,
2468
- library_data_t::complex_float, library_data_t::complex_float):
2469
- {
2470
- detail::gemm_batch_impl<std::complex<float>, std::complex<float>,
2471
- std::complex<float>, std::complex<float>>(
2472
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2473
- batch_size);
2474
- break;
2475
- }
2476
- case detail::get_type_combination_id(
2477
- library_data_t::complex_double, library_data_t::complex_double,
2478
- library_data_t::complex_double, library_data_t::complex_double):
2479
- {
2480
- detail::gemm_batch_impl<std::complex<double>, std::complex<double>,
2481
- std::complex<double>, std::complex<double>>(
2482
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2483
- batch_size);
2436
+ detail::gemm_batch_impl<double, double, double, double>(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb,
2437
+ beta, c, ldc, batch_size, matrix_info);
2484
2438
  break;
2485
2439
  }
2486
2440
  case detail::get_type_combination_id(
2487
2441
  library_data_t::real_half, library_data_t::real_half,
2488
2442
  library_data_t::real_half, library_data_t::real_half):
2489
2443
  {
2490
- detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half,
2491
- sycl::half>(q, a_trans, b_trans, m, n, k, alpha,
2492
- a, lda, b, ldb, beta, c, ldc,
2493
- batch_size);
2444
+ detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half, sycl::half>(
2445
+ q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
2494
2446
  break;
2495
2447
  }
2496
2448
  #ifdef __INTEL_MKL__
@@ -2498,19 +2450,16 @@ namespace dpct
2498
2450
  library_data_t::real_bfloat16, library_data_t::real_bfloat16,
2499
2451
  library_data_t::real_bfloat16, library_data_t::real_float):
2500
2452
  {
2501
- detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16,
2502
- oneapi::mkl::bfloat16, float>(
2503
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2504
- batch_size);
2453
+ detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float>(
2454
+ q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
2505
2455
  break;
2506
2456
  }
2507
2457
  case detail::get_type_combination_id(
2508
2458
  library_data_t::real_bfloat16, library_data_t::real_bfloat16,
2509
2459
  library_data_t::real_float, library_data_t::real_float):
2510
2460
  {
2511
- detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float,
2512
- float>(q, a_trans, b_trans, m, n, k, alpha, a, lda,
2513
- b, ldb, beta, c, ldc, batch_size);
2461
+ detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float, float>(
2462
+ q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
2514
2463
  break;
2515
2464
  }
2516
2465
  #endif
@@ -2522,10 +2471,9 @@ namespace dpct
2522
2471
  dpct::get_value(reinterpret_cast<const std::int32_t *>(alpha), q);
2523
2472
  float beta_float =
2524
2473
  dpct::get_value(reinterpret_cast<const std::int32_t *>(beta), q);
2525
- detail::gemm_batch_impl<std::int8_t, std::int8_t, std::int32_t,
2526
- float>(q, a_trans, b_trans, m, n, k, &alpha_float,
2527
- a, lda, b, ldb, &beta_float, c, ldc,
2528
- batch_size);
2474
+ detail::gemm_batch_impl<std::int8_t, std::int8_t, std::int32_t, float>(
2475
+ q, a_trans, b_trans, m, n, k, &alpha_float, a, lda, b, ldb, &beta_float, c, ldc, batch_size,
2476
+ matrix_info);
2529
2477
  break;
2530
2478
  }
2531
2479
  case detail::get_type_combination_id(
@@ -2533,8 +2481,7 @@ namespace dpct
2533
2481
  library_data_t::real_float, library_data_t::real_float):
2534
2482
  {
2535
2483
  detail::gemm_batch_impl<std::int8_t, std::int8_t, float, float>(
2536
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2537
- batch_size);
2484
+ q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
2538
2485
  break;
2539
2486
  }
2540
2487
  case detail::get_type_combination_id(
@@ -2542,8 +2489,7 @@ namespace dpct
2542
2489
  library_data_t::real_float, library_data_t::real_float):
2543
2490
  {
2544
2491
  detail::gemm_batch_impl<sycl::half, sycl::half, float, float>(
2545
- q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
2546
- batch_size);
2492
+ q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
2547
2493
  break;
2548
2494
  }
2549
2495
  case detail::get_type_combination_id(
@@ -2557,8 +2503,7 @@ namespace dpct
2557
2503
  sycl::half alpha_half(alpha_value);
2558
2504
  sycl::half beta_half(beta_value);
2559
2505
  detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half, sycl::half>(
2560
- q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc,
2561
- batch_size);
2506
+ q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc, batch_size, matrix_info);
2562
2507
  break;
2563
2508
  }
2564
2509
  default:
@@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
882
882
  }
883
883
 
884
884
 
885
- void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
885
+ void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
886
886
  GGML_SYCL_DEBUG("call %s\n", __func__);
887
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
887
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
888
888
  GGML_SYCL_DEBUG("call %s done\n", __func__);
889
889
  }
890
890
 
891
- void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
891
+ void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
892
892
  GGML_SYCL_DEBUG("call %s\n", __func__);
893
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
893
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
894
894
  GGML_SYCL_DEBUG("call %s done\n", __func__);
895
895
  }
896
896
 
897
- void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
897
+ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
898
898
  GGML_SYCL_DEBUG("call %s\n", __func__);
899
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
899
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
900
900
  GGML_SYCL_DEBUG("call %s done\n", __func__);
901
901
  }
902
902
 
903
- void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
903
+ void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
904
904
  GGML_SYCL_DEBUG("call %s\n", __func__);
905
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
905
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
906
906
  GGML_SYCL_DEBUG("call %s done\n", __func__);
907
907
  }
908
908
 
909
- void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
909
+ void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
910
910
  GGML_SYCL_DEBUG("call %s\n", __func__);
911
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
911
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
912
912
  GGML_SYCL_DEBUG("call %s done\n", __func__);
913
913
  }
914
914
 
915
- void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
915
+ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
916
916
  GGML_SYCL_DEBUG("call %s\n", __func__);
917
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
917
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
918
918
  GGML_SYCL_DEBUG("call %s done\n", __func__);
919
919
  }
920
920
 
921
- void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
921
+ void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
922
922
  GGML_SYCL_DEBUG("call %s\n", __func__);
923
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
923
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
924
924
  GGML_SYCL_DEBUG("call %s done\n", __func__);
925
925
  }
926
926
 
927
- void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
927
+ void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
928
928
  GGML_SYCL_DEBUG("call %s\n", __func__);
929
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
929
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
930
930
  GGML_SYCL_DEBUG("call %s done\n", __func__);
931
931
  }
932
932
 
933
- void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
933
+ void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
934
934
  GGML_SYCL_DEBUG("call %s\n", __func__);
935
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
935
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
936
936
  GGML_SYCL_DEBUG("call %s done\n", __func__);
937
937
  }
938
938
 
939
- void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
939
+ void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
940
940
  GGML_SYCL_DEBUG("call %s\n", __func__);
941
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
941
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
942
942
  GGML_SYCL_DEBUG("call %s done\n", __func__);
943
943
  }
944
944
 
945
- void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
945
+ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
946
946
  GGML_SYCL_DEBUG("call %s\n", __func__);
947
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
947
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
948
948
  GGML_SYCL_DEBUG("call %s done\n", __func__);
949
949
  }
950
950
 
951
- void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
951
+ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
952
952
  GGML_SYCL_DEBUG("call %s\n", __func__);
953
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
953
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
954
954
  GGML_SYCL_DEBUG("call %s done\n", __func__);
955
955
  }
956
956
 
957
957
 
958
- void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
958
+ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
959
959
  GGML_SYCL_DEBUG("call %s\n", __func__);
960
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
960
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
961
961
  GGML_SYCL_DEBUG("call %s done\n", __func__);
962
962
  }
963
963
 
964
- void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
964
+ void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
965
965
  GGML_SYCL_DEBUG("call %s\n", __func__);
966
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
966
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
967
967
  GGML_SYCL_DEBUG("call %s done\n", __func__);
968
968
  }
969
969
 
970
- void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
970
+ void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
971
971
  GGML_SYCL_DEBUG("call %s\n", __func__);
972
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
972
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
973
973
  GGML_SYCL_DEBUG("call %s done\n", __func__);
974
974
  }
975
975
 
976
- void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
976
+ void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
977
977
  GGML_SYCL_DEBUG("call %s\n", __func__);
978
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
978
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
979
979
  GGML_SYCL_DEBUG("call %s done\n", __func__);
980
980
  }
981
981
 
982
- void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
982
+ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
983
983
  GGML_SYCL_DEBUG("call %s\n", __func__);
984
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
984
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
985
985
  GGML_SYCL_DEBUG("call %s done\n", __func__);
986
986
  }
987
987
 
988
- void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
988
+ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
989
989
  GGML_SYCL_DEBUG("call %s\n", __func__);
990
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
990
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
991
991
  GGML_SYCL_DEBUG("call %s done\n", __func__);
992
992
  }
993
993
 
994
- void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
994
+ void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
995
995
  GGML_SYCL_DEBUG("call %s\n", __func__);
996
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
996
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
997
997
  GGML_SYCL_DEBUG("call %s done\n", __func__);
998
998
  }
999
999
 
1000
- void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1000
+ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1001
1001
  GGML_SYCL_DEBUG("call %s\n", __func__);
1002
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
1002
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
1003
1003
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1004
1004
  }
1005
1005
 
1006
1006
 
1007
1007
 
1008
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1008
+ void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1009
1009
  GGML_SYCL_DEBUG("call %s\n", __func__);
1010
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
1010
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
1011
1011
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1012
1012
  }
1013
1013
 
1014
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1014
+ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1015
1015
  GGML_SYCL_DEBUG("call %s\n", __func__);
1016
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
1016
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
1017
1017
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1018
1018
  }
1019
1019
 
1020
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1020
+ void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1021
1021
  GGML_SYCL_DEBUG("call %s\n", __func__);
1022
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
1022
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
1023
1023
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1024
1024
  }
1025
1025
 
1026
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1026
+ void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1027
1027
  GGML_SYCL_DEBUG("call %s\n", __func__);
1028
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
1028
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
1029
1029
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1030
1030
  }
@@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) {
25
25
  }
26
26
 
27
27
 
28
- void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
28
+ void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
29
29
 
30
- void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
30
+ void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
31
31
 
32
- void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
32
+ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
33
33
 
34
- void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
34
+ void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
35
35
 
36
- void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
36
+ void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
37
37
 
38
- void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
38
+ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
39
39
 
40
- void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
40
+ void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
41
41
 
42
- void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
42
+ void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
43
43
 
44
- void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
44
+ void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
45
45
 
46
- void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
46
+ void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
47
47
 
48
- void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
48
+ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
49
49
 
50
- void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
50
+ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
51
51
 
52
- void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
52
+ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
53
53
 
54
- void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
54
+ void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
55
55
 
56
- void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
56
+ void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
57
57
 
58
- void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
58
+ void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
59
59
 
60
- void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
60
+ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
61
61
 
62
- void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
62
+ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
63
63
 
64
- void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
64
+ void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
65
65
 
66
- void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
66
+ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
67
67
 
68
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
68
+ void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
69
69
 
70
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
70
+ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
71
71
 
72
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
72
+ void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
73
73
 
74
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
74
+ void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
75
75
 
76
76
  #endif // GGML_SYCL_ELEMENTWISE_HPP