mkl-devel-dpcpp 2025.0.1__py2.py3-none-win_amd64.whl → 2025.2.0__py2.py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkl-devel-dpcpp might be problematic. Click here for more details.
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/METADATA +4 -3
- mkl_devel_dpcpp-2025.2.0.dist-info/RECORD +15 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -26
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -880
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/types.hpp +0 -60
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -1240
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas.hpp +0 -33
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dft.hpp +0 -253
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dfti.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/exceptions.hpp +0 -110
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -67
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -68
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -177
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/export.hpp +0 -25
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -55
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -75
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/lapack.hpp +0 -1095
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -106
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack.hpp +0 -23
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -48
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -89
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +0 -464
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -71
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -43
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -54
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +0 -285
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -270
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -105
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -117
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +0 -223
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -118
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +0 -266
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -125
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -385
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -3668
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -141
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -552
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -355
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -58
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -51
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -289
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +0 -183
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +0 -637
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engines.hpp +0 -187
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -59
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/types.hpp +0 -74
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device.hpp +0 -29
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/distributions.hpp +0 -1913
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -788
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -163
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +0 -111
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +0 -446
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +0 -193
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas.hpp +0 -32
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/stats.hpp +0 -356
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/types.hpp +0 -321
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -3529
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -280
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -1059
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -861
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -4608
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -8963
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -460
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/span.hpp +0 -3813
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -3581
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm.hpp +0 -30
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl.hpp +0 -34
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.dist-info/RECORD +0 -90
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/LICENSE.txt +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/WHEEL +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,1240 +0,0 @@
|
|
|
1
|
-
/*******************************************************************************
|
|
2
|
-
* Copyright 2018-2023 Intel Corporation.
|
|
3
|
-
*
|
|
4
|
-
* This software and the related documents are Intel copyrighted materials, and
|
|
5
|
-
* your use of them is governed by the express license under which they were
|
|
6
|
-
* provided to you (License). Unless the License provides otherwise, you may not
|
|
7
|
-
* use, modify, copy, publish, distribute, disclose or transmit this software or
|
|
8
|
-
* the related documents without Intel's prior written permission.
|
|
9
|
-
*
|
|
10
|
-
* This software and the related documents are provided as is, with no express
|
|
11
|
-
* or implied warranties, other than those that are expressly stated in the
|
|
12
|
-
* License.
|
|
13
|
-
*******************************************************************************/
|
|
14
|
-
|
|
15
|
-
// Level 3
|
|
16
|
-
|
|
17
|
-
#ifndef ONEMKL_INLINE_DECLARE
|
|
18
|
-
#define ONEMKL_INLINE_DECLARE static inline
|
|
19
|
-
#endif
|
|
20
|
-
|
|
21
|
-
#define ONEMKL_DECLARE_GEMM(Ta, Tb, Tc, Ts) \
|
|
22
|
-
DLL_EXPORT sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, \
|
|
23
|
-
std::int64_t m, std::int64_t n, std::int64_t k, \
|
|
24
|
-
value_or_pointer<Ts> alpha, const Ta *a, std::int64_t lda, \
|
|
25
|
-
const Tb *b, std::int64_t ldb, \
|
|
26
|
-
value_or_pointer<Ts> beta, Tc *c, std::int64_t ldc, \
|
|
27
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
28
|
-
ONEMKL_INLINE_DECLARE sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, \
|
|
29
|
-
std::int64_t m, std::int64_t n, std::int64_t k, \
|
|
30
|
-
value_or_pointer<Ts> alpha, const Ta *a, std::int64_t lda, \
|
|
31
|
-
const Tb *b, std::int64_t ldb, \
|
|
32
|
-
value_or_pointer<Ts> beta, Tc *c, std::int64_t ldc, \
|
|
33
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
34
|
-
{ \
|
|
35
|
-
return gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
ONEMKL_DECLARE_GEMM(float, float, float, float)
|
|
39
|
-
ONEMKL_DECLARE_GEMM(double, double, double, double)
|
|
40
|
-
ONEMKL_DECLARE_GEMM(std::complex<float>, std::complex<float>, std::complex<float>, std::complex<float>)
|
|
41
|
-
ONEMKL_DECLARE_GEMM(std::complex<double>, std::complex<double>, std::complex<double>, std::complex<double>)
|
|
42
|
-
ONEMKL_DECLARE_GEMM(sycl::half, sycl::half, sycl::half, sycl::half)
|
|
43
|
-
ONEMKL_DECLARE_GEMM(sycl::half, sycl::half, float, float)
|
|
44
|
-
ONEMKL_DECLARE_GEMM(bfloat16, bfloat16, bfloat16, float)
|
|
45
|
-
ONEMKL_DECLARE_GEMM(bfloat16, bfloat16, float, float)
|
|
46
|
-
ONEMKL_DECLARE_GEMM(std::int8_t, std::int8_t, std::int32_t, float)
|
|
47
|
-
ONEMKL_DECLARE_GEMM(std::int8_t, std::int8_t, float, float)
|
|
48
|
-
|
|
49
|
-
#undef ONEMKL_DECLARE_GEMM
|
|
50
|
-
|
|
51
|
-
#define ONEMKL_DECLARE_SYMM(T) \
|
|
52
|
-
DLL_EXPORT sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, \
|
|
53
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
54
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
55
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
56
|
-
ONEMKL_INLINE_DECLARE sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, \
|
|
57
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
58
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, \
|
|
59
|
-
std::int64_t ldc, const std::vector<sycl::event> &dependencies = {}) \
|
|
60
|
-
{ \
|
|
61
|
-
return symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
ONEMKL_DECLARE_SYMM(float)
|
|
65
|
-
ONEMKL_DECLARE_SYMM(double)
|
|
66
|
-
ONEMKL_DECLARE_SYMM(std::complex<float>)
|
|
67
|
-
ONEMKL_DECLARE_SYMM(std::complex<double>)
|
|
68
|
-
|
|
69
|
-
#undef ONEMKL_DECLARE_SYMM
|
|
70
|
-
|
|
71
|
-
#define ONEMKL_DECLARE_HEMM(T) \
|
|
72
|
-
DLL_EXPORT sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, \
|
|
73
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
74
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
75
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
76
|
-
ONEMKL_INLINE_DECLARE sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, \
|
|
77
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
78
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, \
|
|
79
|
-
std::int64_t ldc, const std::vector<sycl::event> &dependencies = {}) \
|
|
80
|
-
{ \
|
|
81
|
-
return hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
ONEMKL_DECLARE_HEMM(std::complex<float>)
|
|
85
|
-
ONEMKL_DECLARE_HEMM(std::complex<double>)
|
|
86
|
-
|
|
87
|
-
#undef ONEMKL_DECLARE_HEMM
|
|
88
|
-
|
|
89
|
-
#define ONEMKL_DECLARE_SYRK(T) \
|
|
90
|
-
DLL_EXPORT sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
91
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
92
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
93
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
94
|
-
ONEMKL_INLINE_DECLARE sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
95
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
96
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
97
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
98
|
-
{ \
|
|
99
|
-
return syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
ONEMKL_DECLARE_SYRK(float)
|
|
103
|
-
ONEMKL_DECLARE_SYRK(double)
|
|
104
|
-
ONEMKL_DECLARE_SYRK(std::complex<float>)
|
|
105
|
-
ONEMKL_DECLARE_SYRK(std::complex<double>)
|
|
106
|
-
|
|
107
|
-
#undef ONEMKL_DECLARE_SYRK
|
|
108
|
-
|
|
109
|
-
#define ONEMKL_DECLARE_HERK(T, Treal) \
|
|
110
|
-
DLL_EXPORT sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
111
|
-
value_or_pointer<Treal> alpha, const T *a, std::int64_t lda, \
|
|
112
|
-
value_or_pointer<Treal> beta, T *c, std::int64_t ldc, \
|
|
113
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
114
|
-
ONEMKL_INLINE_DECLARE sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
115
|
-
value_or_pointer<Treal> alpha, const T *a, std::int64_t lda, \
|
|
116
|
-
value_or_pointer<Treal> beta, T *c, std::int64_t ldc, \
|
|
117
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
118
|
-
{ \
|
|
119
|
-
return herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
ONEMKL_DECLARE_HERK(std::complex<float>, float)
|
|
123
|
-
ONEMKL_DECLARE_HERK(std::complex<double>, double)
|
|
124
|
-
|
|
125
|
-
#undef ONEMKL_DECLARE_HERK
|
|
126
|
-
|
|
127
|
-
#define ONEMKL_DECLARE_SYR2K(T) \
|
|
128
|
-
DLL_EXPORT sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
129
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
130
|
-
const T *b, std::int64_t ldb, \
|
|
131
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
132
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
133
|
-
ONEMKL_INLINE_DECLARE sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
134
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
135
|
-
const T *b, std::int64_t ldb, \
|
|
136
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
137
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
138
|
-
{ \
|
|
139
|
-
return syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
ONEMKL_DECLARE_SYR2K(float)
|
|
143
|
-
ONEMKL_DECLARE_SYR2K(double)
|
|
144
|
-
ONEMKL_DECLARE_SYR2K(std::complex<float>)
|
|
145
|
-
ONEMKL_DECLARE_SYR2K(std::complex<double>)
|
|
146
|
-
|
|
147
|
-
#undef ONEMKL_DECLARE_SYR2K
|
|
148
|
-
|
|
149
|
-
#define ONEMKL_DECLARE_HER2K(T, Treal) \
|
|
150
|
-
DLL_EXPORT sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
151
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
152
|
-
const T *b, std::int64_t ldb, \
|
|
153
|
-
value_or_pointer<Treal> beta, T *c, std::int64_t ldc, \
|
|
154
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
155
|
-
ONEMKL_INLINE_DECLARE sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
156
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
157
|
-
const T *b, std::int64_t ldb, \
|
|
158
|
-
value_or_pointer<Treal> beta, T *c, std::int64_t ldc, \
|
|
159
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
160
|
-
{ \
|
|
161
|
-
return her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
ONEMKL_DECLARE_HER2K(std::complex<float>, float)
|
|
165
|
-
ONEMKL_DECLARE_HER2K(std::complex<double>, double)
|
|
166
|
-
|
|
167
|
-
#undef ONEMKL_DECLARE_HER2K
|
|
168
|
-
|
|
169
|
-
#define ONEMKL_DECLARE_TRMM(T) \
|
|
170
|
-
DLL_EXPORT sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
171
|
-
std::int64_t m, std::int64_t n, \
|
|
172
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
173
|
-
T *b, std::int64_t ldb, \
|
|
174
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
175
|
-
ONEMKL_INLINE_DECLARE sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
176
|
-
std::int64_t m, std::int64_t n, \
|
|
177
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
178
|
-
T *b, std::int64_t ldb, \
|
|
179
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
180
|
-
{ \
|
|
181
|
-
return trmm(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
ONEMKL_DECLARE_TRMM(float)
|
|
185
|
-
ONEMKL_DECLARE_TRMM(double)
|
|
186
|
-
ONEMKL_DECLARE_TRMM(std::complex<float>)
|
|
187
|
-
ONEMKL_DECLARE_TRMM(std::complex<double>)
|
|
188
|
-
|
|
189
|
-
#undef ONEMKL_DECLARE_TRMM
|
|
190
|
-
|
|
191
|
-
#define ONEMKL_DECLARE_TRMM_OOP(T) \
|
|
192
|
-
DLL_EXPORT sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \
|
|
193
|
-
diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
194
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
195
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
196
|
-
ONEMKL_INLINE_DECLARE sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \
|
|
197
|
-
diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
198
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
199
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
200
|
-
{ \
|
|
201
|
-
return trmm(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, \
|
|
202
|
-
beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
ONEMKL_DECLARE_TRMM_OOP(float)
|
|
206
|
-
ONEMKL_DECLARE_TRMM_OOP(double)
|
|
207
|
-
ONEMKL_DECLARE_TRMM_OOP(std::complex<float>)
|
|
208
|
-
ONEMKL_DECLARE_TRMM_OOP(std::complex<double>)
|
|
209
|
-
|
|
210
|
-
#undef ONEMKL_DECLARE_TRMM_OOP
|
|
211
|
-
|
|
212
|
-
#define ONEMKL_DECLARE_TRSM(T) \
|
|
213
|
-
DLL_EXPORT sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
214
|
-
std::int64_t m, std::int64_t n, \
|
|
215
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
216
|
-
T *b, std::int64_t ldb, \
|
|
217
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
218
|
-
ONEMKL_INLINE_DECLARE sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
219
|
-
std::int64_t m, std::int64_t n, \
|
|
220
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
221
|
-
T *b, std::int64_t ldb, \
|
|
222
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
223
|
-
{ \
|
|
224
|
-
return trsm(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
ONEMKL_DECLARE_TRSM(float)
|
|
228
|
-
ONEMKL_DECLARE_TRSM(double)
|
|
229
|
-
ONEMKL_DECLARE_TRSM(std::complex<float>)
|
|
230
|
-
ONEMKL_DECLARE_TRSM(std::complex<double>)
|
|
231
|
-
|
|
232
|
-
#undef ONEMKL_DECLARE_TRSM
|
|
233
|
-
|
|
234
|
-
#define ONEMKL_DECLARE_TRSM_OOP(T) \
|
|
235
|
-
DLL_EXPORT sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \
|
|
236
|
-
diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
237
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
238
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
239
|
-
ONEMKL_INLINE_DECLARE sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \
|
|
240
|
-
diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
241
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, std::int64_t ldc, \
|
|
242
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
243
|
-
{ \
|
|
244
|
-
return trsm(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, \
|
|
245
|
-
beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
ONEMKL_DECLARE_TRSM_OOP(float)
|
|
249
|
-
ONEMKL_DECLARE_TRSM_OOP(double)
|
|
250
|
-
ONEMKL_DECLARE_TRSM_OOP(std::complex<float>)
|
|
251
|
-
ONEMKL_DECLARE_TRSM_OOP(std::complex<double>)
|
|
252
|
-
|
|
253
|
-
#undef ONEMKL_DECLARE_TRSM_OOP
|
|
254
|
-
|
|
255
|
-
// Level 2
|
|
256
|
-
|
|
257
|
-
#define ONEMKL_DECLARE_DGMM(T) \
|
|
258
|
-
DLL_EXPORT sycl::event dgmm(sycl::queue &queue, side left_right, \
|
|
259
|
-
std::int64_t m, std::int64_t n, \
|
|
260
|
-
const T *a, std::int64_t lda, \
|
|
261
|
-
const T *x, std::int64_t incx, \
|
|
262
|
-
T *c, std::int64_t ldc, const std::vector<sycl::event> &dependencies = {});
|
|
263
|
-
|
|
264
|
-
ONEMKL_DECLARE_DGMM(float)
|
|
265
|
-
ONEMKL_DECLARE_DGMM(double)
|
|
266
|
-
ONEMKL_DECLARE_DGMM(std::complex<float>)
|
|
267
|
-
ONEMKL_DECLARE_DGMM(std::complex<double>)
|
|
268
|
-
|
|
269
|
-
#undef ONEMKL_DECLARE_DGMM
|
|
270
|
-
|
|
271
|
-
#define ONEMKL_DECLARE_GEMV(T) \
|
|
272
|
-
DLL_EXPORT sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, \
|
|
273
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
274
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
275
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
276
|
-
|
|
277
|
-
ONEMKL_DECLARE_GEMV(float)
|
|
278
|
-
ONEMKL_DECLARE_GEMV(double)
|
|
279
|
-
ONEMKL_DECLARE_GEMV(std::complex<float>)
|
|
280
|
-
ONEMKL_DECLARE_GEMV(std::complex<double>)
|
|
281
|
-
|
|
282
|
-
#undef ONEMKL_DECLARE_GEMV
|
|
283
|
-
|
|
284
|
-
#define ONEMKL_DECLARE_GBMV(T) \
|
|
285
|
-
DLL_EXPORT sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, \
|
|
286
|
-
std::int64_t kl, std::int64_t ku, value_or_pointer<T> alpha, \
|
|
287
|
-
const T *a, std::int64_t lda, \
|
|
288
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
289
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
290
|
-
|
|
291
|
-
ONEMKL_DECLARE_GBMV(float)
|
|
292
|
-
ONEMKL_DECLARE_GBMV(double)
|
|
293
|
-
ONEMKL_DECLARE_GBMV(std::complex<float>)
|
|
294
|
-
ONEMKL_DECLARE_GBMV(std::complex<double>)
|
|
295
|
-
|
|
296
|
-
#undef ONEMKL_DECLARE_GBMV
|
|
297
|
-
|
|
298
|
-
#define ONEMKL_DECLARE_GER(T) \
|
|
299
|
-
DLL_EXPORT sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, \
|
|
300
|
-
const T *x, std::int64_t incx, const T *y, std::int64_t incy, \
|
|
301
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
302
|
-
|
|
303
|
-
ONEMKL_DECLARE_GER(float)
|
|
304
|
-
ONEMKL_DECLARE_GER(double)
|
|
305
|
-
|
|
306
|
-
#undef ONEMKL_DECLARE_GER
|
|
307
|
-
|
|
308
|
-
#define ONEMKL_DECLARE_GERC(T) \
|
|
309
|
-
DLL_EXPORT sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, \
|
|
310
|
-
const T *x, std::int64_t incx, \
|
|
311
|
-
const T *y, std::int64_t incy, \
|
|
312
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
313
|
-
|
|
314
|
-
ONEMKL_DECLARE_GERC(std::complex<float>)
|
|
315
|
-
ONEMKL_DECLARE_GERC(std::complex<double>)
|
|
316
|
-
|
|
317
|
-
#undef ONEMKL_DECLARE_GERC
|
|
318
|
-
|
|
319
|
-
#define ONEMKL_DECLARE_GERU(T) \
|
|
320
|
-
DLL_EXPORT sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, \
|
|
321
|
-
const T *x, std::int64_t incx, \
|
|
322
|
-
const T *y, std::int64_t incy, \
|
|
323
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
324
|
-
|
|
325
|
-
ONEMKL_DECLARE_GERU(std::complex<float>)
|
|
326
|
-
ONEMKL_DECLARE_GERU(std::complex<double>)
|
|
327
|
-
|
|
328
|
-
#undef ONEMKL_DECLARE_GERU
|
|
329
|
-
|
|
330
|
-
#define ONEMKL_DECLARE_HBMV(T) \
|
|
331
|
-
DLL_EXPORT sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, \
|
|
332
|
-
std::int64_t k, value_or_pointer<T> alpha, \
|
|
333
|
-
const T *a, std::int64_t lda, \
|
|
334
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
335
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
336
|
-
|
|
337
|
-
ONEMKL_DECLARE_HBMV(std::complex<float>)
|
|
338
|
-
ONEMKL_DECLARE_HBMV(std::complex<double>)
|
|
339
|
-
|
|
340
|
-
#undef ONEMKL_DECLARE_HBMV
|
|
341
|
-
|
|
342
|
-
#define ONEMKL_DECLARE_HEMV(T) \
|
|
343
|
-
DLL_EXPORT sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
344
|
-
const T *a, std::int64_t lda, \
|
|
345
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
346
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
347
|
-
|
|
348
|
-
ONEMKL_DECLARE_HEMV(std::complex<float>)
|
|
349
|
-
ONEMKL_DECLARE_HEMV(std::complex<double>)
|
|
350
|
-
|
|
351
|
-
#undef ONEMKL_DECLARE_HEMV
|
|
352
|
-
|
|
353
|
-
#define ONEMKL_DECLARE_HER(T, Treal) \
|
|
354
|
-
DLL_EXPORT sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<Treal> alpha, \
|
|
355
|
-
const T *x, std::int64_t incx, \
|
|
356
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
357
|
-
|
|
358
|
-
ONEMKL_DECLARE_HER(std::complex<float>, float)
|
|
359
|
-
ONEMKL_DECLARE_HER(std::complex<double>, double)
|
|
360
|
-
|
|
361
|
-
#undef ONEMKL_DECLARE_HER
|
|
362
|
-
|
|
363
|
-
#define ONEMKL_DECLARE_HER2(T) \
|
|
364
|
-
DLL_EXPORT sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
365
|
-
const T *x, std::int64_t incx, \
|
|
366
|
-
const T *y, std::int64_t incy, \
|
|
367
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
368
|
-
|
|
369
|
-
ONEMKL_DECLARE_HER2(std::complex<float>)
|
|
370
|
-
ONEMKL_DECLARE_HER2(std::complex<double>)
|
|
371
|
-
|
|
372
|
-
#undef ONEMKL_DECLARE_HER2
|
|
373
|
-
|
|
374
|
-
#define ONEMKL_DECLARE_HPMV(T) \
|
|
375
|
-
DLL_EXPORT sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
376
|
-
const T *a, \
|
|
377
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
378
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
379
|
-
|
|
380
|
-
ONEMKL_DECLARE_HPMV(std::complex<float>)
|
|
381
|
-
ONEMKL_DECLARE_HPMV(std::complex<double>)
|
|
382
|
-
|
|
383
|
-
#undef ONEMKL_DECLARE_HPMV
|
|
384
|
-
|
|
385
|
-
#define ONEMKL_DECLARE_HPR(T, Treal) \
|
|
386
|
-
DLL_EXPORT sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<Treal> alpha, \
|
|
387
|
-
const T *x, std::int64_t incx, \
|
|
388
|
-
T *a, const std::vector<sycl::event> &dependencies = {});
|
|
389
|
-
|
|
390
|
-
ONEMKL_DECLARE_HPR(std::complex<float>, float)
|
|
391
|
-
ONEMKL_DECLARE_HPR(std::complex<double>, double)
|
|
392
|
-
|
|
393
|
-
#undef ONEMKL_DECLARE_HPR
|
|
394
|
-
|
|
395
|
-
#define ONEMKL_DECLARE_HPR2(T) \
|
|
396
|
-
DLL_EXPORT sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
397
|
-
const T *x, std::int64_t incx, \
|
|
398
|
-
const T *y, std::int64_t incy, \
|
|
399
|
-
T *a, const std::vector<sycl::event> &dependencies = {});
|
|
400
|
-
|
|
401
|
-
ONEMKL_DECLARE_HPR2(std::complex<float>)
|
|
402
|
-
ONEMKL_DECLARE_HPR2(std::complex<double>)
|
|
403
|
-
|
|
404
|
-
#undef ONEMKL_DECLARE_HPR2
|
|
405
|
-
|
|
406
|
-
#define ONEMKL_DECLARE_SBMV(T) \
|
|
407
|
-
DLL_EXPORT sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, \
|
|
408
|
-
std::int64_t k, value_or_pointer<T> alpha, \
|
|
409
|
-
const T *a, std::int64_t lda, \
|
|
410
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
411
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
412
|
-
|
|
413
|
-
ONEMKL_DECLARE_SBMV(float)
|
|
414
|
-
ONEMKL_DECLARE_SBMV(double)
|
|
415
|
-
|
|
416
|
-
#undef ONEMKL_DECLARE_SBMV
|
|
417
|
-
|
|
418
|
-
#define ONEMKL_DECLARE_SYMV(T) \
|
|
419
|
-
DLL_EXPORT sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
420
|
-
const T *a, std::int64_t lda, \
|
|
421
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
422
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
423
|
-
|
|
424
|
-
ONEMKL_DECLARE_SYMV(float)
|
|
425
|
-
ONEMKL_DECLARE_SYMV(double)
|
|
426
|
-
ONEMKL_DECLARE_SYMV(std::complex<float>)
|
|
427
|
-
ONEMKL_DECLARE_SYMV(std::complex<double>)
|
|
428
|
-
|
|
429
|
-
#undef ONEMKL_DECLARE_SYMV
|
|
430
|
-
|
|
431
|
-
#define ONEMKL_DECLARE_SYR(T) \
|
|
432
|
-
DLL_EXPORT sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
433
|
-
const T *x, std::int64_t incx, \
|
|
434
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
435
|
-
|
|
436
|
-
ONEMKL_DECLARE_SYR(float)
|
|
437
|
-
ONEMKL_DECLARE_SYR(double)
|
|
438
|
-
ONEMKL_DECLARE_SYR(std::complex<float>)
|
|
439
|
-
ONEMKL_DECLARE_SYR(std::complex<double>)
|
|
440
|
-
|
|
441
|
-
#undef ONEMKL_DECLARE_SYR
|
|
442
|
-
|
|
443
|
-
#define ONEMKL_DECLARE_SYR2(T) \
|
|
444
|
-
DLL_EXPORT sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
445
|
-
const T *x, std::int64_t incx, \
|
|
446
|
-
const T *y, std::int64_t incy, \
|
|
447
|
-
T *a, std::int64_t lda, const std::vector<sycl::event> &dependencies = {});
|
|
448
|
-
|
|
449
|
-
ONEMKL_DECLARE_SYR2(float)
|
|
450
|
-
ONEMKL_DECLARE_SYR2(double)
|
|
451
|
-
ONEMKL_DECLARE_SYR2(std::complex<float>)
|
|
452
|
-
ONEMKL_DECLARE_SYR2(std::complex<double>)
|
|
453
|
-
|
|
454
|
-
#undef ONEMKL_DECLARE_SYR2
|
|
455
|
-
|
|
456
|
-
#define ONEMKL_DECLARE_SPMV(T) \
|
|
457
|
-
DLL_EXPORT sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
458
|
-
const T *a, \
|
|
459
|
-
const T *x, std::int64_t incx, value_or_pointer<T> beta, \
|
|
460
|
-
T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
461
|
-
|
|
462
|
-
ONEMKL_DECLARE_SPMV(float)
|
|
463
|
-
ONEMKL_DECLARE_SPMV(double)
|
|
464
|
-
|
|
465
|
-
#undef ONEMKL_DECLARE_SPMV
|
|
466
|
-
|
|
467
|
-
#define ONEMKL_DECLARE_SPR(T) \
|
|
468
|
-
DLL_EXPORT sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
469
|
-
const T *x, std::int64_t incx, \
|
|
470
|
-
T *a, const std::vector<sycl::event> &dependencies = {});
|
|
471
|
-
|
|
472
|
-
ONEMKL_DECLARE_SPR(float)
|
|
473
|
-
ONEMKL_DECLARE_SPR(double)
|
|
474
|
-
|
|
475
|
-
#undef ONEMKL_DECLARE_SPR
|
|
476
|
-
|
|
477
|
-
#define ONEMKL_DECLARE_SPR2(T) \
|
|
478
|
-
DLL_EXPORT sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer<T> alpha, \
|
|
479
|
-
const T *x, std::int64_t incx, \
|
|
480
|
-
const T *y, std::int64_t incy, \
|
|
481
|
-
T *a, const std::vector<sycl::event> &dependencies = {});
|
|
482
|
-
|
|
483
|
-
ONEMKL_DECLARE_SPR2(float)
|
|
484
|
-
ONEMKL_DECLARE_SPR2(double)
|
|
485
|
-
|
|
486
|
-
#undef ONEMKL_DECLARE_SPR2
|
|
487
|
-
|
|
488
|
-
#define ONEMKL_DECLARE_TBMV(T) \
|
|
489
|
-
DLL_EXPORT sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
490
|
-
std::int64_t n, std::int64_t k, \
|
|
491
|
-
const T *a, std::int64_t lda, \
|
|
492
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
493
|
-
|
|
494
|
-
ONEMKL_DECLARE_TBMV(float)
|
|
495
|
-
ONEMKL_DECLARE_TBMV(double)
|
|
496
|
-
ONEMKL_DECLARE_TBMV(std::complex<float>)
|
|
497
|
-
ONEMKL_DECLARE_TBMV(std::complex<double>)
|
|
498
|
-
|
|
499
|
-
#undef ONEMKL_DECLARE_TBMV
|
|
500
|
-
|
|
501
|
-
#define ONEMKL_DECLARE_TBSV(T) \
|
|
502
|
-
DLL_EXPORT sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
503
|
-
std::int64_t n, std::int64_t k, \
|
|
504
|
-
const T *a, std::int64_t lda, \
|
|
505
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
506
|
-
|
|
507
|
-
ONEMKL_DECLARE_TBSV(float)
|
|
508
|
-
ONEMKL_DECLARE_TBSV(double)
|
|
509
|
-
ONEMKL_DECLARE_TBSV(std::complex<float>)
|
|
510
|
-
ONEMKL_DECLARE_TBSV(std::complex<double>)
|
|
511
|
-
|
|
512
|
-
#undef ONEMKL_DECLARE_TBSV
|
|
513
|
-
|
|
514
|
-
#define ONEMKL_DECLARE_TPMV(T) \
|
|
515
|
-
DLL_EXPORT sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
516
|
-
std::int64_t n, \
|
|
517
|
-
const T *a, \
|
|
518
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
519
|
-
|
|
520
|
-
ONEMKL_DECLARE_TPMV(float)
|
|
521
|
-
ONEMKL_DECLARE_TPMV(double)
|
|
522
|
-
ONEMKL_DECLARE_TPMV(std::complex<float>)
|
|
523
|
-
ONEMKL_DECLARE_TPMV(std::complex<double>)
|
|
524
|
-
|
|
525
|
-
#undef ONEMKL_DECLARE_TPMV
|
|
526
|
-
|
|
527
|
-
#define ONEMKL_DECLARE_TPSV(T) \
|
|
528
|
-
DLL_EXPORT sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
529
|
-
std::int64_t n, \
|
|
530
|
-
const T *a, \
|
|
531
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
532
|
-
|
|
533
|
-
ONEMKL_DECLARE_TPSV(float)
|
|
534
|
-
ONEMKL_DECLARE_TPSV(double)
|
|
535
|
-
ONEMKL_DECLARE_TPSV(std::complex<float>)
|
|
536
|
-
ONEMKL_DECLARE_TPSV(std::complex<double>)
|
|
537
|
-
|
|
538
|
-
#undef ONEMKL_DECLARE_TPSV
|
|
539
|
-
|
|
540
|
-
#define ONEMKL_DECLARE_TRMV(T) \
|
|
541
|
-
DLL_EXPORT sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
542
|
-
std::int64_t n, \
|
|
543
|
-
const T *a, std::int64_t lda, \
|
|
544
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
545
|
-
|
|
546
|
-
ONEMKL_DECLARE_TRMV(float)
|
|
547
|
-
ONEMKL_DECLARE_TRMV(double)
|
|
548
|
-
ONEMKL_DECLARE_TRMV(std::complex<float>)
|
|
549
|
-
ONEMKL_DECLARE_TRMV(std::complex<double>)
|
|
550
|
-
|
|
551
|
-
#undef ONEMKL_DECLARE_TRMV
|
|
552
|
-
|
|
553
|
-
#define ONEMKL_DECLARE_TRSV(T) \
|
|
554
|
-
DLL_EXPORT sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \
|
|
555
|
-
std::int64_t n, \
|
|
556
|
-
const T *a, std::int64_t lda, \
|
|
557
|
-
T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
558
|
-
|
|
559
|
-
ONEMKL_DECLARE_TRSV(float)
|
|
560
|
-
ONEMKL_DECLARE_TRSV(double)
|
|
561
|
-
ONEMKL_DECLARE_TRSV(std::complex<float>)
|
|
562
|
-
ONEMKL_DECLARE_TRSV(std::complex<double>)
|
|
563
|
-
|
|
564
|
-
#undef ONEMKL_DECLARE_TRSV
|
|
565
|
-
|
|
566
|
-
// Level 1
|
|
567
|
-
|
|
568
|
-
#define ONEMKL_DECLARE_DOTC(T) \
|
|
569
|
-
DLL_EXPORT sycl::event dotc(sycl::queue &queue, std::int64_t n, const T *x, \
|
|
570
|
-
std::int64_t incx, const T *y, std::int64_t incy, \
|
|
571
|
-
T *result, const std::vector<sycl::event> &dependencies = {});
|
|
572
|
-
|
|
573
|
-
ONEMKL_DECLARE_DOTC(std::complex<float>)
|
|
574
|
-
ONEMKL_DECLARE_DOTC(std::complex<double>)
|
|
575
|
-
|
|
576
|
-
#undef ONEMKL_DECLARE_DOTC
|
|
577
|
-
|
|
578
|
-
#define ONEMKL_DECLARE_DOTU(T) \
|
|
579
|
-
DLL_EXPORT sycl::event dotu(sycl::queue &queue, std::int64_t n, const T *x, \
|
|
580
|
-
std::int64_t incx, const T *y, std::int64_t incy, \
|
|
581
|
-
T *result, const std::vector<sycl::event> &dependencies = {});
|
|
582
|
-
|
|
583
|
-
ONEMKL_DECLARE_DOTU(std::complex<float>)
|
|
584
|
-
ONEMKL_DECLARE_DOTU(std::complex<double>)
|
|
585
|
-
|
|
586
|
-
#undef ONEMKL_DECLARE_DOTU
|
|
587
|
-
|
|
588
|
-
#define ONEMKL_DECLARE_IAMAX(Tf, Ti) \
|
|
589
|
-
DLL_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, const Tf *x, std::int64_t incx, \
|
|
590
|
-
Ti *result, index_base base, \
|
|
591
|
-
const std::vector<sycl::event> &dependencies = {}); \
|
|
592
|
-
ONEMKL_INLINE_DECLARE sycl::event iamax(sycl::queue &queue, std::int64_t n, const Tf *x, \
|
|
593
|
-
std::int64_t incx, Ti *result, \
|
|
594
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
595
|
-
{ \
|
|
596
|
-
return iamax(queue, n, x, incx, result, index_base::zero, dependencies); \
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
ONEMKL_DECLARE_IAMAX(float, std::int64_t)
|
|
600
|
-
ONEMKL_DECLARE_IAMAX(float, std::int32_t)
|
|
601
|
-
ONEMKL_DECLARE_IAMAX(double, std::int64_t)
|
|
602
|
-
ONEMKL_DECLARE_IAMAX(double, std::int32_t)
|
|
603
|
-
ONEMKL_DECLARE_IAMAX(std::complex<float>, std::int64_t)
|
|
604
|
-
ONEMKL_DECLARE_IAMAX(std::complex<float>, std::int32_t)
|
|
605
|
-
ONEMKL_DECLARE_IAMAX(std::complex<double>, std::int64_t)
|
|
606
|
-
ONEMKL_DECLARE_IAMAX(std::complex<double>, std::int32_t)
|
|
607
|
-
|
|
608
|
-
#undef ONEMKL_DECLARE_IAMAX
|
|
609
|
-
|
|
610
|
-
#define ONEMKL_DECLARE_IAMIN(Tf, Ti) \
|
|
611
|
-
DLL_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, const Tf *x, std::int64_t incx, \
|
|
612
|
-
Ti *result, index_base base, \
|
|
613
|
-
const std::vector<sycl::event> &dependencies = {}); \
|
|
614
|
-
ONEMKL_INLINE_DECLARE sycl::event iamin(sycl::queue &queue, std::int64_t n, const Tf *x, \
|
|
615
|
-
std::int64_t incx, Ti *result, \
|
|
616
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
617
|
-
{ \
|
|
618
|
-
return iamin(queue, n, x, incx, result, index_base::zero, dependencies); \
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
ONEMKL_DECLARE_IAMIN(float, std::int64_t)
|
|
622
|
-
ONEMKL_DECLARE_IAMIN(float, std::int32_t)
|
|
623
|
-
ONEMKL_DECLARE_IAMIN(double, std::int64_t)
|
|
624
|
-
ONEMKL_DECLARE_IAMIN(double, std::int32_t)
|
|
625
|
-
ONEMKL_DECLARE_IAMIN(std::complex<float>, std::int64_t)
|
|
626
|
-
ONEMKL_DECLARE_IAMIN(std::complex<float>, std::int32_t)
|
|
627
|
-
ONEMKL_DECLARE_IAMIN(std::complex<double>, std::int64_t)
|
|
628
|
-
ONEMKL_DECLARE_IAMIN(std::complex<double>, std::int32_t)
|
|
629
|
-
|
|
630
|
-
#undef ONEMKL_DECLARE_IAMIN
|
|
631
|
-
|
|
632
|
-
#define ONEMKL_DECLARE_ASUM(T, Tres) \
|
|
633
|
-
DLL_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, const T *x, std::int64_t incx, Tres *result, const std::vector<sycl::event> &dependencies = {});
|
|
634
|
-
|
|
635
|
-
ONEMKL_DECLARE_ASUM(float, float)
|
|
636
|
-
ONEMKL_DECLARE_ASUM(double, double)
|
|
637
|
-
ONEMKL_DECLARE_ASUM(std::complex<float>, float)
|
|
638
|
-
ONEMKL_DECLARE_ASUM(std::complex<double>, double)
|
|
639
|
-
|
|
640
|
-
#undef ONEMKL_DECLARE_ASUM
|
|
641
|
-
|
|
642
|
-
#define ONEMKL_DECLARE_AXPY(T) \
|
|
643
|
-
DLL_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer<T> alpha, const T *x, std::int64_t incx, T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
644
|
-
|
|
645
|
-
ONEMKL_DECLARE_AXPY(sycl::half)
|
|
646
|
-
ONEMKL_DECLARE_AXPY(bfloat16)
|
|
647
|
-
ONEMKL_DECLARE_AXPY(float)
|
|
648
|
-
ONEMKL_DECLARE_AXPY(double)
|
|
649
|
-
ONEMKL_DECLARE_AXPY(std::complex<float>)
|
|
650
|
-
ONEMKL_DECLARE_AXPY(std::complex<double>)
|
|
651
|
-
|
|
652
|
-
#undef ONEMKL_DECLARE_AXPY
|
|
653
|
-
|
|
654
|
-
#define ONEMKL_DECLARE_AXPBY(T) \
|
|
655
|
-
DLL_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer<T> alpha, const T *x, std::int64_t incx, value_or_pointer<T> beta, T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
656
|
-
|
|
657
|
-
ONEMKL_DECLARE_AXPBY(float)
|
|
658
|
-
ONEMKL_DECLARE_AXPBY(double)
|
|
659
|
-
ONEMKL_DECLARE_AXPBY(std::complex<float>)
|
|
660
|
-
ONEMKL_DECLARE_AXPBY(std::complex<double>)
|
|
661
|
-
|
|
662
|
-
#undef ONEMKL_DECLARE_AXPBY
|
|
663
|
-
|
|
664
|
-
#define ONEMKL_DECLARE_COPY(T) \
|
|
665
|
-
DLL_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, const T *x, std::int64_t incx, T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
666
|
-
|
|
667
|
-
ONEMKL_DECLARE_COPY(float)
|
|
668
|
-
ONEMKL_DECLARE_COPY(double)
|
|
669
|
-
ONEMKL_DECLARE_COPY(std::complex<float>)
|
|
670
|
-
ONEMKL_DECLARE_COPY(std::complex<double>)
|
|
671
|
-
|
|
672
|
-
#undef ONEMKL_DECLARE_COPY
|
|
673
|
-
|
|
674
|
-
#define ONEMKL_DECLARE_DOT(T, Tres) \
|
|
675
|
-
DLL_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const T *x, std::int64_t incx, const T *y, std::int64_t incy, Tres *result, const std::vector<sycl::event> &dependencies = {});
|
|
676
|
-
|
|
677
|
-
ONEMKL_DECLARE_DOT(sycl::half, sycl::half)
|
|
678
|
-
ONEMKL_DECLARE_DOT(bfloat16, bfloat16)
|
|
679
|
-
ONEMKL_DECLARE_DOT(float, float)
|
|
680
|
-
ONEMKL_DECLARE_DOT(double, double)
|
|
681
|
-
ONEMKL_DECLARE_DOT(float, double)
|
|
682
|
-
|
|
683
|
-
#undef ONEMKL_DECLARE_DOT
|
|
684
|
-
|
|
685
|
-
#define ONEMKL_DECLARE_SDSDOT(T) \
|
|
686
|
-
DLL_EXPORT sycl::event sdsdot(sycl::queue &queue, std::int64_t n, T sb, const T *x, std::int64_t incx, const T *y, std::int64_t incy, T *result, const std::vector<sycl::event> &dependencies = {});
|
|
687
|
-
|
|
688
|
-
ONEMKL_DECLARE_SDSDOT(float)
|
|
689
|
-
|
|
690
|
-
#undef ONEMKL_DECLARE_SDSDOT
|
|
691
|
-
|
|
692
|
-
#define ONEMKL_DECLARE_NRM2(T, Tres) \
|
|
693
|
-
DLL_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, const T *x, std::int64_t incx, Tres *result, const std::vector<sycl::event> &dependencies = {});
|
|
694
|
-
|
|
695
|
-
ONEMKL_DECLARE_NRM2(sycl::half, sycl::half)
|
|
696
|
-
ONEMKL_DECLARE_NRM2(bfloat16, bfloat16)
|
|
697
|
-
ONEMKL_DECLARE_NRM2(float, float)
|
|
698
|
-
ONEMKL_DECLARE_NRM2(double, double)
|
|
699
|
-
ONEMKL_DECLARE_NRM2(std::complex<float>, float)
|
|
700
|
-
ONEMKL_DECLARE_NRM2(std::complex<double>, double)
|
|
701
|
-
|
|
702
|
-
#undef ONEMKL_DECLARE_NRM2
|
|
703
|
-
|
|
704
|
-
#define ONEMKL_DECLARE_ROT(T, Tc, Ts) \
|
|
705
|
-
DLL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, T *x, std::int64_t incx, T *y, std::int64_t incy, value_or_pointer<Tc> c, value_or_pointer<Ts> s, const std::vector<sycl::event> &dependencies = {});
|
|
706
|
-
|
|
707
|
-
ONEMKL_DECLARE_ROT(sycl::half, sycl::half, sycl::half)
|
|
708
|
-
ONEMKL_DECLARE_ROT(bfloat16, bfloat16, bfloat16)
|
|
709
|
-
ONEMKL_DECLARE_ROT(float, float, float)
|
|
710
|
-
ONEMKL_DECLARE_ROT(double, double, double)
|
|
711
|
-
ONEMKL_DECLARE_ROT(std::complex<float>, float, float)
|
|
712
|
-
ONEMKL_DECLARE_ROT(std::complex<float>, float, std::complex<float>)
|
|
713
|
-
ONEMKL_DECLARE_ROT(std::complex<double>, double, double)
|
|
714
|
-
ONEMKL_DECLARE_ROT(std::complex<double>, double, std::complex<double>)
|
|
715
|
-
|
|
716
|
-
#undef ONEMKL_DECLARE_ROT
|
|
717
|
-
|
|
718
|
-
#define ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS(T, Tc, Ts) \
|
|
719
|
-
DLL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, T *x, std::int64_t incx, T *y, std::int64_t incy, value_or_pointer<Tc> c, Ts s, const std::vector<sycl::event> &dependencies = {});
|
|
720
|
-
|
|
721
|
-
ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS(std::complex<float>, float, float)
|
|
722
|
-
ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS(std::complex<float>, float, std::complex<float>)
|
|
723
|
-
ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS(std::complex<double>, double, double)
|
|
724
|
-
ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS(std::complex<double>, double, std::complex<double>)
|
|
725
|
-
|
|
726
|
-
#undef ONEMKL_DECLARE_ROT_EXPLICIT_SCALARS
|
|
727
|
-
|
|
728
|
-
#define ONEMKL_DECLARE_ROTG(T, Tc) \
|
|
729
|
-
DLL_EXPORT sycl::event rotg(sycl::queue &queue, T *a, T *b, Tc *c, T *s, const std::vector<sycl::event> &dependencies = {});
|
|
730
|
-
|
|
731
|
-
ONEMKL_DECLARE_ROTG(float, float)
|
|
732
|
-
ONEMKL_DECLARE_ROTG(double, double)
|
|
733
|
-
ONEMKL_DECLARE_ROTG(std::complex<float>, float)
|
|
734
|
-
ONEMKL_DECLARE_ROTG(std::complex<double>, double)
|
|
735
|
-
|
|
736
|
-
#undef ONEMKL_DECLARE_ROTG
|
|
737
|
-
|
|
738
|
-
#define ONEMKL_DECLARE_ROTM(T) \
|
|
739
|
-
DLL_EXPORT sycl::event rotm(sycl::queue &queue, std::int64_t n, T *x, std::int64_t incx, T *y, std::int64_t incy, const T *param, const std::vector<sycl::event> &dependencies = {});
|
|
740
|
-
|
|
741
|
-
ONEMKL_DECLARE_ROTM(float)
|
|
742
|
-
ONEMKL_DECLARE_ROTM(double)
|
|
743
|
-
|
|
744
|
-
#undef ONEMKL_DECLARE_ROTM
|
|
745
|
-
|
|
746
|
-
#define ONEMKL_DECLARE_ROTMG(T) \
|
|
747
|
-
DLL_EXPORT sycl::event rotmg(sycl::queue &queue, T *d1, T *d2, T *x1, value_or_pointer<T> y1, T *param, const std::vector<sycl::event> &dependencies = {});
|
|
748
|
-
|
|
749
|
-
ONEMKL_DECLARE_ROTMG(float)
|
|
750
|
-
ONEMKL_DECLARE_ROTMG(double)
|
|
751
|
-
|
|
752
|
-
#undef ONEMKL_DECLARE_ROTMG
|
|
753
|
-
|
|
754
|
-
#define ONEMKL_DECLARE_SCAL(T, Ts) \
|
|
755
|
-
DLL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, value_or_pointer<Ts> alpha, T *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
756
|
-
|
|
757
|
-
ONEMKL_DECLARE_SCAL(sycl::half, sycl::half)
|
|
758
|
-
ONEMKL_DECLARE_SCAL(bfloat16, bfloat16)
|
|
759
|
-
ONEMKL_DECLARE_SCAL(float, float)
|
|
760
|
-
ONEMKL_DECLARE_SCAL(double, double)
|
|
761
|
-
ONEMKL_DECLARE_SCAL(std::complex<float>, std::complex<float>)
|
|
762
|
-
ONEMKL_DECLARE_SCAL(std::complex<double>, std::complex<double>)
|
|
763
|
-
ONEMKL_DECLARE_SCAL(std::complex<float>, float)
|
|
764
|
-
ONEMKL_DECLARE_SCAL(std::complex<double>, double)
|
|
765
|
-
DLL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex<float> *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
766
|
-
DLL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex<double> *x, std::int64_t incx, const std::vector<sycl::event> &dependencies = {});
|
|
767
|
-
|
|
768
|
-
#undef ONEMKL_DECLARE_SCAL
|
|
769
|
-
|
|
770
|
-
#define ONEMKL_DECLARE_SWAP(T) \
|
|
771
|
-
DLL_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, T *x, std::int64_t incx, T *y, std::int64_t incy, const std::vector<sycl::event> &dependencies = {});
|
|
772
|
-
|
|
773
|
-
ONEMKL_DECLARE_SWAP(float)
|
|
774
|
-
ONEMKL_DECLARE_SWAP(double)
|
|
775
|
-
ONEMKL_DECLARE_SWAP(std::complex<float>)
|
|
776
|
-
ONEMKL_DECLARE_SWAP(std::complex<double>)
|
|
777
|
-
|
|
778
|
-
#undef ONEMKL_DECLARE_SWAP
|
|
779
|
-
|
|
780
|
-
// Batch API
|
|
781
|
-
|
|
782
|
-
#define ONEMKL_DECLARE_GEMM_BATCH(Ta, Tb, Tc, Ts) \
|
|
783
|
-
ONEMKL_DECLARE_GEMM_BATCH_STRIDED(Ta, Tb, Tc, Ts) \
|
|
784
|
-
ONEMKL_DECLARE_GEMM_BATCH_GROUP(Ta, Tb, Tc, Ts, std::int64_t) \
|
|
785
|
-
ONEMKL_DECLARE_GEMM_BATCH_GROUP(Ta, Tb, Tc, Ts, std::int32_t) \
|
|
786
|
-
ONEMKL_DECLARE_GEMM_BATCH_SPAN(Ta, Tb, Tc, Ts)
|
|
787
|
-
|
|
788
|
-
#define ONEMKL_DECLARE_GEMM_BATCH_STRIDED(Ta, Tb, Tc, Ts) \
|
|
789
|
-
DLL_EXPORT sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, \
|
|
790
|
-
std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<Ts> alpha, \
|
|
791
|
-
const Ta *a, std::int64_t lda, std::int64_t stride_a, \
|
|
792
|
-
const Tb *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
793
|
-
value_or_pointer<Ts> beta, Tc *c, std::int64_t ldc, \
|
|
794
|
-
std::int64_t stride_c, std::int64_t batch_size, \
|
|
795
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
796
|
-
ONEMKL_INLINE_DECLARE sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, \
|
|
797
|
-
std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<Ts> alpha, \
|
|
798
|
-
const Ta *a, std::int64_t lda, std::int64_t stride_a, \
|
|
799
|
-
const Tb *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
800
|
-
value_or_pointer<Ts> beta, Tc *c, std::int64_t ldc, \
|
|
801
|
-
std::int64_t stride_c, std::int64_t batch_size, \
|
|
802
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
803
|
-
{ \
|
|
804
|
-
return gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
#define ONEMKL_DECLARE_GEMM_BATCH_GROUP(Ta, Tb, Tc, Ts, Ti) \
|
|
808
|
-
DLL_EXPORT sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, \
|
|
809
|
-
const Ti *m, const Ti *n, const Ti *k, const Ts *alpha, \
|
|
810
|
-
const Ta **a, const Ti *lda, \
|
|
811
|
-
const Tb **b, const Ti *ldb, \
|
|
812
|
-
const Ts *beta, Tc **c, const Ti *ldc, \
|
|
813
|
-
std::int64_t group_count, const Ti *groupsize, \
|
|
814
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
815
|
-
ONEMKL_INLINE_DECLARE sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, \
|
|
816
|
-
const Ti *m, const Ti *n, const Ti *k, const Ts *alpha, \
|
|
817
|
-
const Ta **a, const Ti *lda, \
|
|
818
|
-
const Tb **b, const Ti *ldb, \
|
|
819
|
-
const Ts *beta, Tc **c, const Ti *ldc, \
|
|
820
|
-
std::int64_t group_count, const Ti *groupsize, \
|
|
821
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
822
|
-
{ \
|
|
823
|
-
return gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
#define ONEMKL_DECLARE_GEMM_BATCH_SPAN(Ta, Tb, Tc, Ts) \
|
|
827
|
-
DLL_EXPORT sycl::event gemm_batch(sycl::queue &queue, \
|
|
828
|
-
const sycl::span<transpose> &transa, const sycl::span<transpose> &transb, \
|
|
829
|
-
const sycl::span<std::int64_t> &m, const sycl::span<std::int64_t> &n, \
|
|
830
|
-
const sycl::span<std::int64_t> &k, \
|
|
831
|
-
const sycl::span<Ts> &alpha, \
|
|
832
|
-
const sycl::span<const Ta*> &a, const sycl::span<std::int64_t> &lda, \
|
|
833
|
-
const sycl::span<const Tb*> &b, const sycl::span<std::int64_t> &ldb, \
|
|
834
|
-
const sycl::span<Ts> &beta, \
|
|
835
|
-
const sycl::span<Tc*> &c, const sycl::span<std::int64_t> &ldc, \
|
|
836
|
-
size_t group_count, const sycl::span<size_t> &groupsize, \
|
|
837
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
838
|
-
ONEMKL_INLINE_DECLARE sycl::event gemm_batch(sycl::queue &queue, \
|
|
839
|
-
const sycl::span<transpose> &transa, const sycl::span<transpose> &transb, \
|
|
840
|
-
const sycl::span<std::int64_t> &m, const sycl::span<std::int64_t> &n, \
|
|
841
|
-
const sycl::span<std::int64_t> &k, \
|
|
842
|
-
const sycl::span<Ts> &alpha, \
|
|
843
|
-
const sycl::span<const Ta*> &a, const sycl::span<std::int64_t> &lda, \
|
|
844
|
-
const sycl::span<const Tb*> &b, const sycl::span<std::int64_t> &ldb, \
|
|
845
|
-
const sycl::span<Ts> &beta, \
|
|
846
|
-
const sycl::span<Tc*> &c, const sycl::span<std::int64_t> &ldc, \
|
|
847
|
-
size_t group_count, const sycl::span<size_t> &groupsize, \
|
|
848
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
849
|
-
{ \
|
|
850
|
-
return gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
ONEMKL_DECLARE_GEMM_BATCH(float, float, float, float)
|
|
854
|
-
ONEMKL_DECLARE_GEMM_BATCH(double, double, double, double)
|
|
855
|
-
ONEMKL_DECLARE_GEMM_BATCH(std::complex<float>, std::complex<float>, std::complex<float>, std::complex<float>)
|
|
856
|
-
ONEMKL_DECLARE_GEMM_BATCH(std::complex<double>, std::complex<double>, std::complex<double>, std::complex<double>)
|
|
857
|
-
ONEMKL_DECLARE_GEMM_BATCH(sycl::half, sycl::half, sycl::half, sycl::half)
|
|
858
|
-
ONEMKL_DECLARE_GEMM_BATCH(sycl::half, sycl::half, float, float)
|
|
859
|
-
ONEMKL_DECLARE_GEMM_BATCH(bfloat16, bfloat16, bfloat16, float)
|
|
860
|
-
ONEMKL_DECLARE_GEMM_BATCH(bfloat16, bfloat16, float, float)
|
|
861
|
-
ONEMKL_DECLARE_GEMM_BATCH(std::int8_t, std::int8_t, std::int32_t, float)
|
|
862
|
-
ONEMKL_DECLARE_GEMM_BATCH(std::int8_t, std::int8_t, float, float)
|
|
863
|
-
|
|
864
|
-
#undef ONEMKL_DECLARE_GEMM_BATCH
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
#define ONEMKL_DECLARE_SYRK_BATCH(T) \
|
|
868
|
-
ONEMKL_DECLARE_SYRK_BATCH_STRIDED(T) \
|
|
869
|
-
ONEMKL_DECLARE_SYRK_BATCH_GROUP(T, std::int64_t) \
|
|
870
|
-
ONEMKL_DECLARE_SYRK_BATCH_GROUP(T, std::int32_t)
|
|
871
|
-
|
|
872
|
-
#define ONEMKL_DECLARE_SYRK_BATCH_STRIDED(T) \
|
|
873
|
-
DLL_EXPORT sycl::event syrk_batch(sycl::queue &queue, \
|
|
874
|
-
uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
875
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, std::int64_t stride_a, \
|
|
876
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, std::int64_t stride_c, \
|
|
877
|
-
std::int64_t batch_size, \
|
|
878
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
879
|
-
ONEMKL_INLINE_DECLARE sycl::event syrk_batch(sycl::queue &queue, \
|
|
880
|
-
uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, \
|
|
881
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, std::int64_t stride_a, \
|
|
882
|
-
value_or_pointer<T> beta, T *c, std::int64_t ldc, std::int64_t stride_c, \
|
|
883
|
-
std::int64_t batch_size, const std::vector<sycl::event> &dependencies = {}) \
|
|
884
|
-
{ \
|
|
885
|
-
return syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
886
|
-
}
|
|
887
|
-
|
|
888
|
-
#define ONEMKL_DECLARE_SYRK_BATCH_GROUP(T, Ti) \
|
|
889
|
-
DLL_EXPORT sycl::event syrk_batch(sycl::queue &queue, \
|
|
890
|
-
const uplo *upper_lower, const transpose *trans, const Ti *n, const Ti *k, \
|
|
891
|
-
const T *alpha, const T **a, const Ti *lda, const T *beta, \
|
|
892
|
-
T **c, const Ti *ldc, \
|
|
893
|
-
std::int64_t group_count, const Ti *groupsize, \
|
|
894
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
895
|
-
ONEMKL_INLINE_DECLARE sycl::event syrk_batch(sycl::queue &queue, \
|
|
896
|
-
const uplo *upper_lower, const transpose *trans, const Ti *n, const Ti *k, \
|
|
897
|
-
const T *alpha, const T **a, const Ti *lda, const T *beta, \
|
|
898
|
-
T **c, const Ti *ldc, \
|
|
899
|
-
std::int64_t group_count, const Ti *groupsize, \
|
|
900
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
901
|
-
{ \
|
|
902
|
-
return syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, groupsize, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
903
|
-
}
|
|
904
|
-
|
|
905
|
-
ONEMKL_DECLARE_SYRK_BATCH(float)
|
|
906
|
-
ONEMKL_DECLARE_SYRK_BATCH(double)
|
|
907
|
-
ONEMKL_DECLARE_SYRK_BATCH(std::complex<float>)
|
|
908
|
-
ONEMKL_DECLARE_SYRK_BATCH(std::complex<double>)
|
|
909
|
-
|
|
910
|
-
#undef ONEMKL_DECLARE_SYRK_BATCH
|
|
911
|
-
|
|
912
|
-
#define ONEMKL_DECLARE_TRSM_BATCH(T) \
|
|
913
|
-
ONEMKL_DECLARE_TRSM_BATCH_STRIDED(T) \
|
|
914
|
-
ONEMKL_DECLARE_TRSM_BATCH_GROUP(T, std::int64_t) \
|
|
915
|
-
ONEMKL_DECLARE_TRSM_BATCH_GROUP(T, std::int32_t)
|
|
916
|
-
|
|
917
|
-
#define ONEMKL_DECLARE_TRSM_BATCH_STRIDED(T) \
|
|
918
|
-
DLL_EXPORT sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, \
|
|
919
|
-
transpose trans, diag unit_diag, \
|
|
920
|
-
std::int64_t m, std::int64_t n, \
|
|
921
|
-
value_or_pointer<T> alpha, const T *a, \
|
|
922
|
-
std::int64_t lda, std::int64_t stride_a, \
|
|
923
|
-
T *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
924
|
-
std::int64_t batch_size, \
|
|
925
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
926
|
-
ONEMKL_INLINE_DECLARE sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, \
|
|
927
|
-
transpose trans, diag unit_diag, \
|
|
928
|
-
std::int64_t m, std::int64_t n, \
|
|
929
|
-
value_or_pointer<T> alpha, const T *a, \
|
|
930
|
-
std::int64_t lda, std::int64_t stride_a, \
|
|
931
|
-
T *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
932
|
-
std::int64_t batch_size, \
|
|
933
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
934
|
-
{ \
|
|
935
|
-
return trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
#define ONEMKL_DECLARE_TRSM_BATCH_GROUP(T, Ti) \
|
|
939
|
-
DLL_EXPORT sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, \
|
|
940
|
-
const transpose *trans, const diag *unit_diag, \
|
|
941
|
-
const Ti *m, const Ti *n, \
|
|
942
|
-
const T *alpha, const T **a, const Ti *lda, \
|
|
943
|
-
T **b, const Ti *ldb, \
|
|
944
|
-
std::int64_t group_count, const Ti *group_size, \
|
|
945
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
946
|
-
ONEMKL_INLINE_DECLARE sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, \
|
|
947
|
-
const transpose *trans, const diag *unit_diag, \
|
|
948
|
-
const Ti *m, const Ti *n, \
|
|
949
|
-
const T *alpha, const T **a, const Ti *lda, \
|
|
950
|
-
T **b, const Ti *ldb, \
|
|
951
|
-
std::int64_t group_count, const Ti *group_size, \
|
|
952
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
953
|
-
{ \
|
|
954
|
-
return trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
955
|
-
}
|
|
956
|
-
|
|
957
|
-
ONEMKL_DECLARE_TRSM_BATCH(float)
|
|
958
|
-
ONEMKL_DECLARE_TRSM_BATCH(double)
|
|
959
|
-
ONEMKL_DECLARE_TRSM_BATCH(std::complex<float>)
|
|
960
|
-
ONEMKL_DECLARE_TRSM_BATCH(std::complex<double>)
|
|
961
|
-
|
|
962
|
-
#undef ONEMKL_DECLARE_TRSM_BATCH
|
|
963
|
-
|
|
964
|
-
#define ONEMKL_DECLARE_DGMM_BATCH(T) \
|
|
965
|
-
ONEMKL_DECLARE_DGMM_BATCH_STRIDED(T) \
|
|
966
|
-
ONEMKL_DECLARE_DGMM_BATCH_GROUP(T, std::int64_t) \
|
|
967
|
-
ONEMKL_DECLARE_DGMM_BATCH_GROUP(T, std::int32_t)
|
|
968
|
-
|
|
969
|
-
#define ONEMKL_DECLARE_DGMM_BATCH_STRIDED(T) \
|
|
970
|
-
DLL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, side left_right, \
|
|
971
|
-
std::int64_t m, std::int64_t n, \
|
|
972
|
-
const T *a, std::int64_t lda, std::int64_t stridea, \
|
|
973
|
-
const T *x, std::int64_t incx, std::int64_t stridex, \
|
|
974
|
-
T *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, \
|
|
975
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
976
|
-
|
|
977
|
-
#define ONEMKL_DECLARE_DGMM_BATCH_GROUP(Tf, Ti) \
|
|
978
|
-
DLL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, \
|
|
979
|
-
const Ti *m, const Ti *n, \
|
|
980
|
-
const Tf **a, const Ti *lda, \
|
|
981
|
-
const Tf **x, const Ti *incx, \
|
|
982
|
-
Tf **c, const Ti *ldc, \
|
|
983
|
-
std::int64_t group_count, const Ti *group_size, \
|
|
984
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
985
|
-
|
|
986
|
-
ONEMKL_DECLARE_DGMM_BATCH(float)
|
|
987
|
-
ONEMKL_DECLARE_DGMM_BATCH(double)
|
|
988
|
-
ONEMKL_DECLARE_DGMM_BATCH(std::complex<float>)
|
|
989
|
-
ONEMKL_DECLARE_DGMM_BATCH(std::complex<double>)
|
|
990
|
-
|
|
991
|
-
#undef ONEMKL_DECLARE_DGMM_BATCH_STRIDED
|
|
992
|
-
#undef ONEMKL_DECLARE_DGMM_BATCH_GROUP
|
|
993
|
-
#undef ONEMKL_DECLARE_DGMM_BATCH
|
|
994
|
-
|
|
995
|
-
#define ONEMKL_DECLARE_GEMV_BATCH(T) \
|
|
996
|
-
ONEMKL_DECLARE_GEMV_BATCH_STRIDED(T) \
|
|
997
|
-
ONEMKL_DECLARE_GEMV_BATCH_GROUP(T, std::int64_t) \
|
|
998
|
-
ONEMKL_DECLARE_GEMV_BATCH_GROUP(T, std::int32_t)
|
|
999
|
-
|
|
1000
|
-
#define ONEMKL_DECLARE_GEMV_BATCH_STRIDED(T) \
|
|
1001
|
-
DLL_EXPORT sycl::event gemv_batch(sycl::queue &queue, transpose trans, \
|
|
1002
|
-
std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, \
|
|
1003
|
-
const T *a, std::int64_t lda, std::int64_t stridea, \
|
|
1004
|
-
const T *x, std::int64_t incx, std::int64_t stridex, value_or_pointer<T> beta, \
|
|
1005
|
-
T *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, \
|
|
1006
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1007
|
-
|
|
1008
|
-
#define ONEMKL_DECLARE_GEMV_BATCH_GROUP(Tf, Ti) \
|
|
1009
|
-
DLL_EXPORT sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, \
|
|
1010
|
-
const Ti *m, const Ti *n, const Tf *alpha, \
|
|
1011
|
-
const Tf **a, const Ti *lda, \
|
|
1012
|
-
const Tf **x, const Ti *incx, const Tf *beta, \
|
|
1013
|
-
Tf **y, const Ti *incy, \
|
|
1014
|
-
std::int64_t group_count, const Ti *group_size, \
|
|
1015
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1016
|
-
|
|
1017
|
-
ONEMKL_DECLARE_GEMV_BATCH(float)
|
|
1018
|
-
ONEMKL_DECLARE_GEMV_BATCH(double)
|
|
1019
|
-
ONEMKL_DECLARE_GEMV_BATCH(std::complex<float>)
|
|
1020
|
-
ONEMKL_DECLARE_GEMV_BATCH(std::complex<double>)
|
|
1021
|
-
|
|
1022
|
-
#undef ONEMKL_DECLARE_GEMV_BATCH_STRIDED
|
|
1023
|
-
#undef ONEMKL_DECLARE_GEMV_BATCH_GROUP
|
|
1024
|
-
#undef ONEMKL_DECLARE_GEMV_BATCH
|
|
1025
|
-
|
|
1026
|
-
#define ONEMKL_DECLARE_AXPY_BATCH(T) \
|
|
1027
|
-
ONEMKL_DECLARE_AXPY_BATCH_STRIDED(T) \
|
|
1028
|
-
ONEMKL_DECLARE_AXPY_BATCH_GROUP(T, std::int64_t) \
|
|
1029
|
-
ONEMKL_DECLARE_AXPY_BATCH_GROUP(T, std::int32_t) \
|
|
1030
|
-
|
|
1031
|
-
#define ONEMKL_DECLARE_AXPY_BATCH_STRIDED(T) \
|
|
1032
|
-
DLL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer<T> alpha, \
|
|
1033
|
-
const T *x, std::int64_t incx, std::int64_t stridex, \
|
|
1034
|
-
T *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, \
|
|
1035
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1036
|
-
|
|
1037
|
-
#define ONEMKL_DECLARE_AXPY_BATCH_GROUP(Tf, Ti) \
|
|
1038
|
-
DLL_EXPORT sycl::event axpy_batch(sycl::queue &queue, const Ti *n, const Tf *alpha, const Tf **x, \
|
|
1039
|
-
const Ti *incx, Tf **y, const Ti *incy, std::int64_t group_count, \
|
|
1040
|
-
const Ti *group_size, \
|
|
1041
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1042
|
-
|
|
1043
|
-
ONEMKL_DECLARE_AXPY_BATCH(float)
|
|
1044
|
-
ONEMKL_DECLARE_AXPY_BATCH(double)
|
|
1045
|
-
ONEMKL_DECLARE_AXPY_BATCH(std::complex<float>)
|
|
1046
|
-
ONEMKL_DECLARE_AXPY_BATCH(std::complex<double>)
|
|
1047
|
-
|
|
1048
|
-
#undef ONEMKL_DECLARE_AXPY_BATCH_STRIDED
|
|
1049
|
-
#undef ONEMKL_DECLARE_AXPY_BATCH_GROUP
|
|
1050
|
-
#undef ONEMKL_DECLARE_AXPY_BATCH
|
|
1051
|
-
|
|
1052
|
-
#define ONEMKL_DECLARE_COPY_BATCH(T) \
|
|
1053
|
-
ONEMKL_DECLARE_COPY_BATCH_STRIDED(T) \
|
|
1054
|
-
ONEMKL_DECLARE_COPY_BATCH_GROUP(T, std::int64_t) \
|
|
1055
|
-
ONEMKL_DECLARE_COPY_BATCH_GROUP(T, std::int32_t)
|
|
1056
|
-
|
|
1057
|
-
#define ONEMKL_DECLARE_COPY_BATCH_STRIDED(T) \
|
|
1058
|
-
DLL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, \
|
|
1059
|
-
const T *x, std::int64_t incx, std::int64_t stridex, \
|
|
1060
|
-
T *y, std::int64_t incy, std::int64_t stridey, \
|
|
1061
|
-
std::int64_t batch_size, \
|
|
1062
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1063
|
-
|
|
1064
|
-
#define ONEMKL_DECLARE_COPY_BATCH_GROUP(Tf, Ti) \
|
|
1065
|
-
DLL_EXPORT sycl::event copy_batch(sycl::queue &queue, const Ti *n, \
|
|
1066
|
-
const Tf **x, const Ti *incx, Tf **y, const Ti *incy, \
|
|
1067
|
-
std::int64_t group_count, const Ti *group_size, \
|
|
1068
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1069
|
-
|
|
1070
|
-
ONEMKL_DECLARE_COPY_BATCH(float)
|
|
1071
|
-
ONEMKL_DECLARE_COPY_BATCH(double)
|
|
1072
|
-
ONEMKL_DECLARE_COPY_BATCH(std::complex<float>)
|
|
1073
|
-
ONEMKL_DECLARE_COPY_BATCH(std::complex<double>)
|
|
1074
|
-
|
|
1075
|
-
#undef ONEMKL_DECLARE_COPY_BATCH_STRIDED
|
|
1076
|
-
#undef ONEMKL_DECLARE_COPY_BATCH_GROUP
|
|
1077
|
-
#undef ONEMKL_DECLARE_COPY_BATCH
|
|
1078
|
-
|
|
1079
|
-
// BLAS like
|
|
1080
|
-
|
|
1081
|
-
#define ONEMKL_DECLARE_GEMMT(T) \
|
|
1082
|
-
DLL_EXPORT sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, \
|
|
1083
|
-
std::int64_t k, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
1084
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, \
|
|
1085
|
-
std::int64_t ldc, compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
1086
|
-
ONEMKL_INLINE_DECLARE sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, \
|
|
1087
|
-
std::int64_t k, value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
1088
|
-
const T *b, std::int64_t ldb, value_or_pointer<T> beta, T *c, \
|
|
1089
|
-
std::int64_t ldc, const std::vector<sycl::event> &dependencies = {}) \
|
|
1090
|
-
{ \
|
|
1091
|
-
return gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
1092
|
-
}
|
|
1093
|
-
|
|
1094
|
-
ONEMKL_DECLARE_GEMMT(float)
|
|
1095
|
-
ONEMKL_DECLARE_GEMMT(double)
|
|
1096
|
-
ONEMKL_DECLARE_GEMMT(std::complex<float>)
|
|
1097
|
-
ONEMKL_DECLARE_GEMMT(std::complex<double>)
|
|
1098
|
-
|
|
1099
|
-
#undef ONEMKL_DECLARE_GEMMT
|
|
1100
|
-
|
|
1101
|
-
#define ONEMKL_DECLARE_GEMM_BIAS(Ta, Tb) \
|
|
1102
|
-
DLL_EXPORT sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, \
|
|
1103
|
-
offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, \
|
|
1104
|
-
const Ta *a, std::int64_t lda, Ta ao, \
|
|
1105
|
-
const Tb *b, std::int64_t ldb, Tb bo, \
|
|
1106
|
-
value_or_pointer<float> beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, \
|
|
1107
|
-
compute_mode mode, const std::vector<sycl::event> &dependencies = {}); \
|
|
1108
|
-
ONEMKL_INLINE_DECLARE sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, \
|
|
1109
|
-
offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, \
|
|
1110
|
-
const Ta *a, std::int64_t lda, Ta ao, \
|
|
1111
|
-
const Tb *b, std::int64_t ldb, Tb bo, \
|
|
1112
|
-
value_or_pointer<float> beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, \
|
|
1113
|
-
const std::vector<sycl::event> &dependencies = {}) \
|
|
1114
|
-
{ \
|
|
1115
|
-
return gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, MKL_BLAS_COMPUTE_MODE, dependencies); \
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
ONEMKL_DECLARE_GEMM_BIAS(std::int8_t, std::int8_t)
|
|
1119
|
-
ONEMKL_DECLARE_GEMM_BIAS(std::int8_t, std::uint8_t)
|
|
1120
|
-
ONEMKL_DECLARE_GEMM_BIAS(std::uint8_t, std::int8_t)
|
|
1121
|
-
ONEMKL_DECLARE_GEMM_BIAS(std::uint8_t, std::uint8_t)
|
|
1122
|
-
|
|
1123
|
-
#undef ONEMKL_DECLARE_GEMM_BIAS
|
|
1124
|
-
|
|
1125
|
-
#define ONEMKL_DECLARE_IMATCOPY(T) \
|
|
1126
|
-
DLL_EXPORT sycl::event imatcopy(sycl::queue &queue, transpose trans, \
|
|
1127
|
-
std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, T *ab, \
|
|
1128
|
-
std::int64_t lda, std::int64_t ldb, \
|
|
1129
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1130
|
-
|
|
1131
|
-
ONEMKL_DECLARE_IMATCOPY(float)
|
|
1132
|
-
ONEMKL_DECLARE_IMATCOPY(double)
|
|
1133
|
-
ONEMKL_DECLARE_IMATCOPY(std::complex<float>)
|
|
1134
|
-
ONEMKL_DECLARE_IMATCOPY(std::complex<double>)
|
|
1135
|
-
|
|
1136
|
-
#undef ONEMKL_DECLARE_IMATCOPY
|
|
1137
|
-
|
|
1138
|
-
#define ONEMKL_DECLARE_OMATCOPY(T) \
|
|
1139
|
-
DLL_EXPORT sycl::event omatcopy(sycl::queue &queue, transpose trans, \
|
|
1140
|
-
std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, const T *a, \
|
|
1141
|
-
std::int64_t lda, T *b, std::int64_t ldb, \
|
|
1142
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1143
|
-
|
|
1144
|
-
ONEMKL_DECLARE_OMATCOPY(float)
|
|
1145
|
-
ONEMKL_DECLARE_OMATCOPY(double)
|
|
1146
|
-
ONEMKL_DECLARE_OMATCOPY(std::complex<float>)
|
|
1147
|
-
ONEMKL_DECLARE_OMATCOPY(std::complex<double>)
|
|
1148
|
-
|
|
1149
|
-
#undef ONEMKL_DECLARE_OMATCOPY
|
|
1150
|
-
|
|
1151
|
-
#define ONEMKL_DECLARE_OMATADD(T) \
|
|
1152
|
-
DLL_EXPORT sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, \
|
|
1153
|
-
std::int64_t m, std::int64_t n, \
|
|
1154
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, \
|
|
1155
|
-
value_or_pointer<T> beta, const T *b, std::int64_t ldb, \
|
|
1156
|
-
T *c, std::int64_t ldc, \
|
|
1157
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1158
|
-
|
|
1159
|
-
ONEMKL_DECLARE_OMATADD(float)
|
|
1160
|
-
ONEMKL_DECLARE_OMATADD(double)
|
|
1161
|
-
ONEMKL_DECLARE_OMATADD(std::complex<float>)
|
|
1162
|
-
ONEMKL_DECLARE_OMATADD(std::complex<double>)
|
|
1163
|
-
|
|
1164
|
-
#undef ONEMKL_DECLARE_OMATADD
|
|
1165
|
-
|
|
1166
|
-
#define ONEMKL_DECLARE_IMATCOPY_BATCH(T) \
|
|
1167
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH_STRIDED(T) \
|
|
1168
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH_GROUP(T, std::int64_t) \
|
|
1169
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH_GROUP(T, std::int32_t)
|
|
1170
|
-
|
|
1171
|
-
#define ONEMKL_DECLARE_IMATCOPY_BATCH_STRIDED(T) \
|
|
1172
|
-
DLL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, \
|
|
1173
|
-
std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, T *ab, std::int64_t lda, \
|
|
1174
|
-
std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, \
|
|
1175
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1176
|
-
|
|
1177
|
-
#define ONEMKL_DECLARE_IMATCOPY_BATCH_GROUP(T, Ti) \
|
|
1178
|
-
DLL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, \
|
|
1179
|
-
const Ti *m, const Ti *n, const T *alpha, T **ab, \
|
|
1180
|
-
const Ti *lda, const Ti *ldb, std::int64_t group_count, \
|
|
1181
|
-
const Ti *groupsize, \
|
|
1182
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1183
|
-
|
|
1184
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH(float)
|
|
1185
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH(double)
|
|
1186
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH(std::complex<float>)
|
|
1187
|
-
ONEMKL_DECLARE_IMATCOPY_BATCH(std::complex<double>)
|
|
1188
|
-
|
|
1189
|
-
#undef ONEMKL_DECLARE_IMATCOPY_BATCH_GROUP
|
|
1190
|
-
#undef ONEMKL_DECLARE_IMATCOPY_BATCH_STRIDED
|
|
1191
|
-
#undef ONEMKL_DECLARE_IMATCOPY_BATCH
|
|
1192
|
-
|
|
1193
|
-
#define ONEMKL_DECLARE_OMATCOPY_BATCH(T) \
|
|
1194
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH_STRIDED(T) \
|
|
1195
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH_GROUP(T, std::int64_t) \
|
|
1196
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH_GROUP(T, std::int32_t)
|
|
1197
|
-
|
|
1198
|
-
#define ONEMKL_DECLARE_OMATCOPY_BATCH_STRIDED(T) \
|
|
1199
|
-
DLL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, \
|
|
1200
|
-
std::int64_t m, std::int64_t n, value_or_pointer<T> alpha, \
|
|
1201
|
-
const T *a, std::int64_t lda, std::int64_t stride_a, \
|
|
1202
|
-
T *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
1203
|
-
std::int64_t batch_size, \
|
|
1204
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1205
|
-
|
|
1206
|
-
#define ONEMKL_DECLARE_OMATCOPY_BATCH_GROUP(T, Ti) \
|
|
1207
|
-
DLL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, \
|
|
1208
|
-
const Ti *m, const Ti *n, const T *alpha, const T **a, \
|
|
1209
|
-
const Ti *lda, T **b, const Ti *ldb, std::int64_t group_count, \
|
|
1210
|
-
const Ti *groupsize, \
|
|
1211
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1212
|
-
|
|
1213
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH(float)
|
|
1214
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH(double)
|
|
1215
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH(std::complex<float>)
|
|
1216
|
-
ONEMKL_DECLARE_OMATCOPY_BATCH(std::complex<double>)
|
|
1217
|
-
|
|
1218
|
-
#undef ONEMKL_DECLARE_OMATCOPY_BATCH_GROUP
|
|
1219
|
-
#undef ONEMKL_DECLARE_OMATCOPY_BATCH_STRIDED
|
|
1220
|
-
#undef ONEMKL_DECLARE_OMATCOPY_BATCH
|
|
1221
|
-
|
|
1222
|
-
#define ONEMKL_DECLARE_OMATADD_BATCH(T) \
|
|
1223
|
-
ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(T)
|
|
1224
|
-
|
|
1225
|
-
#define ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(T) \
|
|
1226
|
-
DLL_EXPORT sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, \
|
|
1227
|
-
std::int64_t m, std::int64_t n, \
|
|
1228
|
-
value_or_pointer<T> alpha, const T *a, std::int64_t lda, std::int64_t stride_a, \
|
|
1229
|
-
value_or_pointer<T> beta, const T *b, std::int64_t ldb, std::int64_t stride_b, \
|
|
1230
|
-
T *c, std::int64_t ldc, std::int64_t stride_c, \
|
|
1231
|
-
std::int64_t batch_size, \
|
|
1232
|
-
const std::vector<sycl::event> &dependencies = {});
|
|
1233
|
-
|
|
1234
|
-
ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(float)
|
|
1235
|
-
ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(double)
|
|
1236
|
-
ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(std::complex<float>)
|
|
1237
|
-
ONEMKL_DECLARE_OMATADD_BATCH_STRIDED(std::complex<double>)
|
|
1238
|
-
|
|
1239
|
-
#undef ONEMKL_DECLARE_OMATADD_BATCH
|
|
1240
|
-
|