@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -82,6 +82,14 @@ inline std::string get_device_backend_and_type(const sycl::device &device) {
|
|
|
82
82
|
return device_type.str();
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
template <typename Ts> struct matrix_info_t {
|
|
86
|
+
oneapi::mkl::transpose transpose_info[2];
|
|
87
|
+
Ts value_info[2];
|
|
88
|
+
std::int64_t size_info[3];
|
|
89
|
+
std::int64_t ld_info[3];
|
|
90
|
+
std::int64_t groupsize_info;
|
|
91
|
+
};
|
|
92
|
+
|
|
85
93
|
namespace dpct
|
|
86
94
|
{
|
|
87
95
|
typedef sycl::queue *queue_ptr;
|
|
@@ -1727,26 +1735,13 @@ namespace dpct
|
|
|
1727
1735
|
};
|
|
1728
1736
|
|
|
1729
1737
|
template <class Ta, class Tb, class Tc, class Ts>
|
|
1730
|
-
inline void gemm_batch_impl(sycl::queue &q, oneapi::mkl::transpose a_trans,
|
|
1731
|
-
|
|
1732
|
-
const void *
|
|
1733
|
-
|
|
1734
|
-
int ldc, int batch_size)
|
|
1735
|
-
{
|
|
1736
|
-
struct matrix_info_t
|
|
1737
|
-
{
|
|
1738
|
-
oneapi::mkl::transpose transpose_info[2];
|
|
1739
|
-
Ts value_info[2];
|
|
1740
|
-
std::int64_t size_info[3];
|
|
1741
|
-
std::int64_t ld_info[3];
|
|
1742
|
-
std::int64_t groupsize_info;
|
|
1743
|
-
};
|
|
1744
|
-
|
|
1738
|
+
inline void gemm_batch_impl(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans,
|
|
1739
|
+
int m, int n, int k, const void * alpha, const void ** a, int lda, const void ** b,
|
|
1740
|
+
int ldb, const void * beta, void ** c, int ldc, int batch_size,
|
|
1741
|
+
matrix_info_t<float> * matrix_info) {
|
|
1745
1742
|
Ts alpha_value = dpct::get_value(reinterpret_cast<const Ts *>(alpha), q);
|
|
1746
1743
|
Ts beta_value = dpct::get_value(reinterpret_cast<const Ts *>(beta), q);
|
|
1747
1744
|
|
|
1748
|
-
matrix_info_t *matrix_info =
|
|
1749
|
-
(matrix_info_t *)std::malloc(sizeof(matrix_info_t));
|
|
1750
1745
|
matrix_info->transpose_info[0] = a_trans;
|
|
1751
1746
|
matrix_info->transpose_info[1] = b_trans;
|
|
1752
1747
|
matrix_info->value_info[0] = alpha_value;
|
|
@@ -1763,23 +1758,18 @@ namespace dpct
|
|
|
1763
1758
|
sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
|
|
1764
1759
|
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q }, matrix_info->transpose_info,
|
|
1765
1760
|
matrix_info->transpose_info + 1, matrix_info->size_info, matrix_info->size_info + 1,
|
|
1766
|
-
matrix_info->size_info + 2,
|
|
1767
|
-
matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
|
|
1768
|
-
matrix_info->
|
|
1769
|
-
&(matrix_info->groupsize_info));
|
|
1761
|
+
matrix_info->size_info + 2, reinterpret_cast<Ts *>(matrix_info->value_info),
|
|
1762
|
+
reinterpret_cast<const Ta **>(a), matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
|
|
1763
|
+
matrix_info->ld_info + 1, reinterpret_cast<Ts *>(matrix_info->value_info + 1),
|
|
1764
|
+
reinterpret_cast<Tc **>(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
|
|
1770
1765
|
#else
|
|
1771
1766
|
sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
|
|
1772
1767
|
q, matrix_info->transpose_info, matrix_info->transpose_info + 1, matrix_info->size_info,
|
|
1773
|
-
matrix_info->size_info + 1, matrix_info->size_info + 2, matrix_info->value_info,
|
|
1768
|
+
matrix_info->size_info + 1, matrix_info->size_info + 2, reinterpret_cast<Ts *>(matrix_info->value_info),
|
|
1774
1769
|
reinterpret_cast<const Ta **>(a), matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
|
|
1775
|
-
matrix_info->ld_info + 1, matrix_info->value_info + 1
|
|
1776
|
-
matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
|
|
1770
|
+
matrix_info->ld_info + 1, reinterpret_cast<Ts *>(matrix_info->value_info + 1),
|
|
1771
|
+
reinterpret_cast<Tc **>(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));
|
|
1777
1772
|
#endif
|
|
1778
|
-
|
|
1779
|
-
q.submit([&](sycl::handler &cgh)
|
|
1780
|
-
{
|
|
1781
|
-
cgh.depends_on(e);
|
|
1782
|
-
cgh.host_task([=] { std::free(matrix_info); }); });
|
|
1783
1773
|
}
|
|
1784
1774
|
|
|
1785
1775
|
template <class Ta, class Tb, class Tc, class Ts>
|
|
@@ -2422,25 +2412,11 @@ namespace dpct
|
|
|
2422
2412
|
/// \param [in] ldc Leading dimension of C.
|
|
2423
2413
|
/// \param [in] batch_size Specifies the number of matrix multiply operations to perform.
|
|
2424
2414
|
/// \param [in] scaling_type Data type of the scaling factors.
|
|
2425
|
-
inline void gemm_batch(sycl::queue &q, oneapi::mkl::transpose a_trans,
|
|
2426
|
-
|
|
2427
|
-
const void *
|
|
2428
|
-
library_data_t
|
|
2429
|
-
|
|
2430
|
-
void *c[], library_data_t c_type, int ldc,
|
|
2431
|
-
int batch_size, library_data_t scaling_type)
|
|
2432
|
-
{
|
|
2433
|
-
if (scaling_type == library_data_t::real_float &&
|
|
2434
|
-
c_type == library_data_t::complex_float)
|
|
2435
|
-
{
|
|
2436
|
-
scaling_type = library_data_t::complex_float;
|
|
2437
|
-
}
|
|
2438
|
-
else if (scaling_type == library_data_t::real_double &&
|
|
2439
|
-
c_type == library_data_t::complex_double)
|
|
2440
|
-
{
|
|
2441
|
-
scaling_type = library_data_t::complex_double;
|
|
2442
|
-
}
|
|
2443
|
-
|
|
2415
|
+
inline void gemm_batch(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans, int m,
|
|
2416
|
+
int n, int k, const void * alpha, const void * a[], library_data_t a_type, int lda,
|
|
2417
|
+
const void * b[], library_data_t b_type, int ldb, const void * beta, void * c[],
|
|
2418
|
+
library_data_t c_type, int ldc, int batch_size, library_data_t scaling_type,
|
|
2419
|
+
matrix_info_t<float> * matrix_info) {
|
|
2444
2420
|
std::uint64_t key =
|
|
2445
2421
|
detail::get_type_combination_id(a_type, b_type, c_type, scaling_type);
|
|
2446
2422
|
switch (key)
|
|
@@ -2449,48 +2425,24 @@ namespace dpct
|
|
|
2449
2425
|
library_data_t::real_float, library_data_t::real_float,
|
|
2450
2426
|
library_data_t::real_float, library_data_t::real_float):
|
|
2451
2427
|
{
|
|
2452
|
-
detail::gemm_batch_impl<float, float, float, float>(
|
|
2453
|
-
|
|
2454
|
-
batch_size);
|
|
2428
|
+
detail::gemm_batch_impl<float, float, float, float>(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb,
|
|
2429
|
+
beta, c, ldc, batch_size, matrix_info);
|
|
2455
2430
|
break;
|
|
2456
2431
|
}
|
|
2457
2432
|
case detail::get_type_combination_id(
|
|
2458
2433
|
library_data_t::real_double, library_data_t::real_double,
|
|
2459
2434
|
library_data_t::real_double, library_data_t::real_double):
|
|
2460
2435
|
{
|
|
2461
|
-
detail::gemm_batch_impl<double, double, double, double>(
|
|
2462
|
-
|
|
2463
|
-
batch_size);
|
|
2464
|
-
break;
|
|
2465
|
-
}
|
|
2466
|
-
case detail::get_type_combination_id(
|
|
2467
|
-
library_data_t::complex_float, library_data_t::complex_float,
|
|
2468
|
-
library_data_t::complex_float, library_data_t::complex_float):
|
|
2469
|
-
{
|
|
2470
|
-
detail::gemm_batch_impl<std::complex<float>, std::complex<float>,
|
|
2471
|
-
std::complex<float>, std::complex<float>>(
|
|
2472
|
-
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
|
|
2473
|
-
batch_size);
|
|
2474
|
-
break;
|
|
2475
|
-
}
|
|
2476
|
-
case detail::get_type_combination_id(
|
|
2477
|
-
library_data_t::complex_double, library_data_t::complex_double,
|
|
2478
|
-
library_data_t::complex_double, library_data_t::complex_double):
|
|
2479
|
-
{
|
|
2480
|
-
detail::gemm_batch_impl<std::complex<double>, std::complex<double>,
|
|
2481
|
-
std::complex<double>, std::complex<double>>(
|
|
2482
|
-
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
|
|
2483
|
-
batch_size);
|
|
2436
|
+
detail::gemm_batch_impl<double, double, double, double>(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb,
|
|
2437
|
+
beta, c, ldc, batch_size, matrix_info);
|
|
2484
2438
|
break;
|
|
2485
2439
|
}
|
|
2486
2440
|
case detail::get_type_combination_id(
|
|
2487
2441
|
library_data_t::real_half, library_data_t::real_half,
|
|
2488
2442
|
library_data_t::real_half, library_data_t::real_half):
|
|
2489
2443
|
{
|
|
2490
|
-
detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half,
|
|
2491
|
-
|
|
2492
|
-
a, lda, b, ldb, beta, c, ldc,
|
|
2493
|
-
batch_size);
|
|
2444
|
+
detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half, sycl::half>(
|
|
2445
|
+
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
|
|
2494
2446
|
break;
|
|
2495
2447
|
}
|
|
2496
2448
|
#ifdef __INTEL_MKL__
|
|
@@ -2498,19 +2450,16 @@ namespace dpct
|
|
|
2498
2450
|
library_data_t::real_bfloat16, library_data_t::real_bfloat16,
|
|
2499
2451
|
library_data_t::real_bfloat16, library_data_t::real_float):
|
|
2500
2452
|
{
|
|
2501
|
-
detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16,
|
|
2502
|
-
|
|
2503
|
-
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
|
|
2504
|
-
batch_size);
|
|
2453
|
+
detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float>(
|
|
2454
|
+
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
|
|
2505
2455
|
break;
|
|
2506
2456
|
}
|
|
2507
2457
|
case detail::get_type_combination_id(
|
|
2508
2458
|
library_data_t::real_bfloat16, library_data_t::real_bfloat16,
|
|
2509
2459
|
library_data_t::real_float, library_data_t::real_float):
|
|
2510
2460
|
{
|
|
2511
|
-
detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float,
|
|
2512
|
-
|
|
2513
|
-
b, ldb, beta, c, ldc, batch_size);
|
|
2461
|
+
detail::gemm_batch_impl<oneapi::mkl::bfloat16, oneapi::mkl::bfloat16, float, float>(
|
|
2462
|
+
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
|
|
2514
2463
|
break;
|
|
2515
2464
|
}
|
|
2516
2465
|
#endif
|
|
@@ -2522,10 +2471,9 @@ namespace dpct
|
|
|
2522
2471
|
dpct::get_value(reinterpret_cast<const std::int32_t *>(alpha), q);
|
|
2523
2472
|
float beta_float =
|
|
2524
2473
|
dpct::get_value(reinterpret_cast<const std::int32_t *>(beta), q);
|
|
2525
|
-
detail::gemm_batch_impl<std::int8_t, std::int8_t, std::int32_t,
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
batch_size);
|
|
2474
|
+
detail::gemm_batch_impl<std::int8_t, std::int8_t, std::int32_t, float>(
|
|
2475
|
+
q, a_trans, b_trans, m, n, k, &alpha_float, a, lda, b, ldb, &beta_float, c, ldc, batch_size,
|
|
2476
|
+
matrix_info);
|
|
2529
2477
|
break;
|
|
2530
2478
|
}
|
|
2531
2479
|
case detail::get_type_combination_id(
|
|
@@ -2533,8 +2481,7 @@ namespace dpct
|
|
|
2533
2481
|
library_data_t::real_float, library_data_t::real_float):
|
|
2534
2482
|
{
|
|
2535
2483
|
detail::gemm_batch_impl<std::int8_t, std::int8_t, float, float>(
|
|
2536
|
-
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
|
|
2537
|
-
batch_size);
|
|
2484
|
+
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
|
|
2538
2485
|
break;
|
|
2539
2486
|
}
|
|
2540
2487
|
case detail::get_type_combination_id(
|
|
@@ -2542,8 +2489,7 @@ namespace dpct
|
|
|
2542
2489
|
library_data_t::real_float, library_data_t::real_float):
|
|
2543
2490
|
{
|
|
2544
2491
|
detail::gemm_batch_impl<sycl::half, sycl::half, float, float>(
|
|
2545
|
-
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
|
|
2546
|
-
batch_size);
|
|
2492
|
+
q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info);
|
|
2547
2493
|
break;
|
|
2548
2494
|
}
|
|
2549
2495
|
case detail::get_type_combination_id(
|
|
@@ -2557,8 +2503,7 @@ namespace dpct
|
|
|
2557
2503
|
sycl::half alpha_half(alpha_value);
|
|
2558
2504
|
sycl::half beta_half(beta_value);
|
|
2559
2505
|
detail::gemm_batch_impl<sycl::half, sycl::half, sycl::half, sycl::half>(
|
|
2560
|
-
q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc,
|
|
2561
|
-
batch_size);
|
|
2506
|
+
q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc, batch_size, matrix_info);
|
|
2562
2507
|
break;
|
|
2563
2508
|
}
|
|
2564
2509
|
default:
|
|
@@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
882
882
|
}
|
|
883
883
|
|
|
884
884
|
|
|
885
|
-
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx,
|
|
885
|
+
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
886
886
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
887
|
-
ggml_sycl_op_flatten(ctx,
|
|
887
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
|
|
888
888
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
889
889
|
}
|
|
890
890
|
|
|
891
|
-
void ggml_sycl_sin(ggml_backend_sycl_context & ctx,
|
|
891
|
+
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
892
892
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
893
|
-
ggml_sycl_op_flatten(ctx,
|
|
893
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
|
|
894
894
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
895
895
|
}
|
|
896
896
|
|
|
897
|
-
void ggml_sycl_cos(ggml_backend_sycl_context & ctx,
|
|
897
|
+
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
898
898
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
899
|
-
ggml_sycl_op_flatten(ctx,
|
|
899
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
|
|
900
900
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
901
901
|
}
|
|
902
902
|
|
|
903
|
-
void ggml_sycl_acc(ggml_backend_sycl_context & ctx,
|
|
903
|
+
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
904
904
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
905
|
-
ggml_sycl_op_flatten(ctx,
|
|
905
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
|
|
906
906
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
907
907
|
}
|
|
908
908
|
|
|
909
|
-
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx,
|
|
909
|
+
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
910
910
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
911
|
-
ggml_sycl_op_flatten(ctx,
|
|
911
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
|
|
912
912
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
913
913
|
}
|
|
914
914
|
|
|
915
|
-
void ggml_sycl_silu(ggml_backend_sycl_context & ctx,
|
|
915
|
+
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
916
916
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
917
|
-
ggml_sycl_op_flatten(ctx,
|
|
917
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
|
|
918
918
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
919
919
|
}
|
|
920
920
|
|
|
921
|
-
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx,
|
|
921
|
+
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
922
922
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
923
|
-
ggml_sycl_op_flatten(ctx,
|
|
923
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
|
|
924
924
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
925
925
|
}
|
|
926
926
|
|
|
927
|
-
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx,
|
|
927
|
+
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
928
928
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
929
|
-
ggml_sycl_op_flatten(ctx,
|
|
929
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
|
|
930
930
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
931
931
|
}
|
|
932
932
|
|
|
933
|
-
void ggml_sycl_relu(ggml_backend_sycl_context & ctx,
|
|
933
|
+
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
934
934
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
935
|
-
ggml_sycl_op_flatten(ctx,
|
|
935
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
|
|
936
936
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
937
937
|
}
|
|
938
938
|
|
|
939
|
-
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx,
|
|
939
|
+
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
940
940
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
941
|
-
ggml_sycl_op_flatten(ctx,
|
|
941
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
|
|
942
942
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
943
943
|
}
|
|
944
944
|
|
|
945
|
-
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx,
|
|
945
|
+
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
946
946
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
947
|
-
ggml_sycl_op_flatten(ctx,
|
|
947
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
|
|
948
948
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
949
949
|
}
|
|
950
950
|
|
|
951
|
-
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx,
|
|
951
|
+
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
952
952
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
953
|
-
ggml_sycl_op_flatten(ctx,
|
|
953
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
|
|
954
954
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
955
955
|
}
|
|
956
956
|
|
|
957
957
|
|
|
958
|
-
void ggml_sycl_exp(ggml_backend_sycl_context & ctx,
|
|
958
|
+
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
959
959
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
960
|
-
ggml_sycl_op_flatten(ctx,
|
|
960
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
|
|
961
961
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
962
962
|
}
|
|
963
963
|
|
|
964
|
-
void ggml_sycl_log(ggml_backend_sycl_context & ctx,
|
|
964
|
+
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
965
965
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
966
|
-
ggml_sycl_op_flatten(ctx,
|
|
966
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
|
|
967
967
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
968
968
|
}
|
|
969
969
|
|
|
970
|
-
void ggml_sycl_neg(ggml_backend_sycl_context & ctx,
|
|
970
|
+
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
971
971
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
972
|
-
ggml_sycl_op_flatten(ctx,
|
|
972
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
|
|
973
973
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
974
974
|
}
|
|
975
975
|
|
|
976
|
-
void ggml_sycl_step(ggml_backend_sycl_context & ctx,
|
|
976
|
+
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
977
977
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
978
|
-
ggml_sycl_op_flatten(ctx,
|
|
978
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
|
|
979
979
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
980
980
|
}
|
|
981
981
|
|
|
982
|
-
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx,
|
|
982
|
+
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
983
983
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
984
|
-
ggml_sycl_op_flatten(ctx,
|
|
984
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
|
|
985
985
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
986
986
|
}
|
|
987
987
|
|
|
988
|
-
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx,
|
|
988
|
+
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
989
989
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
990
|
-
ggml_sycl_op_flatten(ctx,
|
|
990
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
|
|
991
991
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
992
992
|
}
|
|
993
993
|
|
|
994
|
-
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx,
|
|
994
|
+
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
995
995
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
996
|
-
ggml_sycl_op_flatten(ctx,
|
|
996
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
|
|
997
997
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
998
998
|
}
|
|
999
999
|
|
|
1000
|
-
void ggml_sycl_pad(ggml_backend_sycl_context & ctx,
|
|
1000
|
+
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1001
1001
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
1002
|
-
ggml_sycl_op_flatten(ctx,
|
|
1002
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
|
|
1003
1003
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1004
1004
|
}
|
|
1005
1005
|
|
|
1006
1006
|
|
|
1007
1007
|
|
|
1008
|
-
void ggml_sycl_add(ggml_backend_sycl_context & ctx,
|
|
1008
|
+
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1009
1009
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
1010
|
-
ggml_sycl_op_flatten(ctx,
|
|
1010
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
|
|
1011
1011
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1012
1012
|
}
|
|
1013
1013
|
|
|
1014
|
-
void ggml_sycl_sub(ggml_backend_sycl_context & ctx,
|
|
1014
|
+
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1015
1015
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
1016
|
-
ggml_sycl_op_flatten(ctx,
|
|
1016
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
|
|
1017
1017
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1018
1018
|
}
|
|
1019
1019
|
|
|
1020
|
-
void ggml_sycl_mul(ggml_backend_sycl_context & ctx,
|
|
1020
|
+
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1021
1021
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
1022
|
-
ggml_sycl_op_flatten(ctx,
|
|
1022
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
|
|
1023
1023
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1024
1024
|
}
|
|
1025
1025
|
|
|
1026
|
-
void ggml_sycl_div(ggml_backend_sycl_context & ctx,
|
|
1026
|
+
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
|
1027
1027
|
GGML_SYCL_DEBUG("call %s\n", __func__);
|
|
1028
|
-
ggml_sycl_op_flatten(ctx,
|
|
1028
|
+
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
|
|
1029
1029
|
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
|
1030
1030
|
}
|
|
@@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx,
|
|
28
|
+
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
29
29
|
|
|
30
|
-
void ggml_sycl_sin(ggml_backend_sycl_context & ctx,
|
|
30
|
+
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
31
31
|
|
|
32
|
-
void ggml_sycl_cos(ggml_backend_sycl_context & ctx,
|
|
32
|
+
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
33
33
|
|
|
34
|
-
void ggml_sycl_acc(ggml_backend_sycl_context & ctx,
|
|
34
|
+
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
35
35
|
|
|
36
|
-
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx,
|
|
36
|
+
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
37
37
|
|
|
38
|
-
void ggml_sycl_silu(ggml_backend_sycl_context & ctx,
|
|
38
|
+
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
39
39
|
|
|
40
|
-
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx,
|
|
40
|
+
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
41
41
|
|
|
42
|
-
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx,
|
|
42
|
+
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
43
43
|
|
|
44
|
-
void ggml_sycl_relu(ggml_backend_sycl_context & ctx,
|
|
44
|
+
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
45
45
|
|
|
46
|
-
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx,
|
|
46
|
+
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
47
47
|
|
|
48
|
-
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx,
|
|
48
|
+
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
49
49
|
|
|
50
|
-
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx,
|
|
50
|
+
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
51
51
|
|
|
52
|
-
void ggml_sycl_exp(ggml_backend_sycl_context & ctx,
|
|
52
|
+
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
53
53
|
|
|
54
|
-
void ggml_sycl_log(ggml_backend_sycl_context & ctx,
|
|
54
|
+
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
55
55
|
|
|
56
|
-
void ggml_sycl_neg(ggml_backend_sycl_context & ctx,
|
|
56
|
+
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
57
57
|
|
|
58
|
-
void ggml_sycl_step(ggml_backend_sycl_context & ctx,
|
|
58
|
+
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
59
59
|
|
|
60
|
-
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx,
|
|
60
|
+
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
61
61
|
|
|
62
|
-
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx,
|
|
62
|
+
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
63
63
|
|
|
64
|
-
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx,
|
|
64
|
+
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
65
65
|
|
|
66
|
-
void ggml_sycl_pad(ggml_backend_sycl_context & ctx,
|
|
66
|
+
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
67
67
|
|
|
68
|
-
void ggml_sycl_add(ggml_backend_sycl_context & ctx,
|
|
68
|
+
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
69
69
|
|
|
70
|
-
void ggml_sycl_sub(ggml_backend_sycl_context & ctx,
|
|
70
|
+
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
71
71
|
|
|
72
|
-
void ggml_sycl_mul(ggml_backend_sycl_context & ctx,
|
|
72
|
+
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
73
73
|
|
|
74
|
-
void ggml_sycl_div(ggml_backend_sycl_context & ctx,
|
|
74
|
+
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|
75
75
|
|
|
76
76
|
#endif // GGML_SYCL_ELEMENTWISE_HPP
|