mkl-devel-dpcpp 2025.0.1__py2.py3-none-win_amd64.whl → 2025.2.0__py2.py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkl-devel-dpcpp might be problematic. Click here for more details.
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/METADATA +4 -3
- mkl_devel_dpcpp-2025.2.0.dist-info/RECORD +15 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -26
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -880
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/types.hpp +0 -60
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -1240
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas.hpp +0 -33
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dft.hpp +0 -253
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dfti.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/exceptions.hpp +0 -110
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -67
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -68
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -177
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/export.hpp +0 -25
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -55
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -75
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/lapack.hpp +0 -1095
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -106
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack.hpp +0 -23
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -48
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -89
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +0 -464
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -71
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -43
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -54
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +0 -285
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -270
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -105
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -117
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +0 -223
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -118
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +0 -266
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -125
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -385
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -3668
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -141
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -552
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -355
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -58
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -51
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -289
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +0 -183
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +0 -637
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engines.hpp +0 -187
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -59
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/types.hpp +0 -74
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device.hpp +0 -29
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/distributions.hpp +0 -1913
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -788
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -163
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +0 -111
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +0 -446
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +0 -193
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas.hpp +0 -32
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/stats.hpp +0 -356
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/types.hpp +0 -321
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -3529
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -280
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -1059
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -861
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -4608
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -8963
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -460
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/span.hpp +0 -3813
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -3581
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm.hpp +0 -30
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl.hpp +0 -34
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.dist-info/RECORD +0 -90
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/LICENSE.txt +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/WHEEL +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
/*******************************************************************************
|
|
2
|
-
* Copyright 2023 Intel Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing,
|
|
11
|
-
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions
|
|
14
|
-
* and limitations under the License.
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
-
*******************************************************************************/
|
|
19
|
-
|
|
20
|
-
#ifndef _MKL_RNG_DEVICE_MRG32K3A_HELPERS_IMPL_HPP_
|
|
21
|
-
#define _MKL_RNG_DEVICE_MRG32K3A_HELPERS_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include "oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp"
|
|
24
|
-
|
|
25
|
-
namespace oneapi::mkl::rng::device::detail {
|
|
26
|
-
namespace mrg32k3a_impl {
|
|
27
|
-
|
|
28
|
-
template <std::int32_t VecSize>
|
|
29
|
-
static inline void init(
|
|
30
|
-
engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state, std::size_t id,
|
|
31
|
-
const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
|
|
32
|
-
std::size_t num_elements_acc =
|
|
33
|
-
sizeof(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>) / sizeof(std::uint32_t);
|
|
34
|
-
state.s[0] = accessor[id * num_elements_acc];
|
|
35
|
-
state.s[1] = accessor[id * num_elements_acc + 1];
|
|
36
|
-
state.s[2] = accessor[id * num_elements_acc + 2];
|
|
37
|
-
state.s[3] = accessor[id * num_elements_acc + 3];
|
|
38
|
-
state.s[4] = accessor[id * num_elements_acc + 4];
|
|
39
|
-
state.s[5] = accessor[id * num_elements_acc + 5];
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
template <std::int32_t VecSize>
|
|
43
|
-
static inline void store(
|
|
44
|
-
engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state, std::size_t id,
|
|
45
|
-
const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
|
|
46
|
-
std::size_t num_elements_acc =
|
|
47
|
-
sizeof(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>) / sizeof(std::uint32_t);
|
|
48
|
-
accessor[id * num_elements_acc] = state.s[0];
|
|
49
|
-
accessor[id * num_elements_acc + 1] = state.s[1];
|
|
50
|
-
accessor[id * num_elements_acc + 2] = state.s[2];
|
|
51
|
-
accessor[id * num_elements_acc + 3] = state.s[3];
|
|
52
|
-
accessor[id * num_elements_acc + 4] = state.s[4];
|
|
53
|
-
accessor[id * num_elements_acc + 5] = state.s[5];
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
} // namespace mrg32k3a_impl
|
|
57
|
-
|
|
58
|
-
template <std::int32_t VecSize>
|
|
59
|
-
class engine_accessor_base<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
|
|
60
|
-
public:
|
|
61
|
-
engine_accessor_base(sycl::buffer<std::uint32_t, 1>& state_buf, sycl::handler& cgh)
|
|
62
|
-
: states_accessor_(state_buf, cgh) {}
|
|
63
|
-
|
|
64
|
-
oneapi::mkl::rng::device::mrg32k3a<VecSize> load(std::size_t id) const {
|
|
65
|
-
oneapi::mkl::rng::device::mrg32k3a<VecSize> engine;
|
|
66
|
-
mrg32k3a_impl::init(engine.state_, id, states_accessor_);
|
|
67
|
-
return engine;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
void store(oneapi::mkl::rng::device::mrg32k3a<VecSize>& engine, std::size_t id) const {
|
|
71
|
-
mrg32k3a_impl::store(engine.state_, id, states_accessor_);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
protected:
|
|
75
|
-
sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write> states_accessor_;
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
template <std::int32_t VecSize>
|
|
79
|
-
class engine_descriptor_base<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
|
|
80
|
-
public:
|
|
81
|
-
using engine_type = oneapi::mkl::rng::device::mrg32k3a<VecSize>;
|
|
82
|
-
|
|
83
|
-
using accessor_type =
|
|
84
|
-
oneapi::mkl::rng::device::engine_accessor<oneapi::mkl::rng::device::mrg32k3a<VecSize>>;
|
|
85
|
-
|
|
86
|
-
engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, std::uint32_t seed,
|
|
87
|
-
std::uint64_t offset)
|
|
88
|
-
: states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
|
|
89
|
-
sizeof(std::uint32_t)) {
|
|
90
|
-
queue.submit([&](sycl::handler& cgh) {
|
|
91
|
-
accessor_type states_accessor(states_buffer_, cgh);
|
|
92
|
-
|
|
93
|
-
cgh.parallel_for<class init_kernel<engine_type>>(range, [=](sycl::item<1> item) {
|
|
94
|
-
std::size_t id = item.get_id(0);
|
|
95
|
-
oneapi::mkl::rng::device::mrg32k3a<VecSize> engine(seed, offset* id);
|
|
96
|
-
states_accessor.store(engine, id);
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
template <typename InitEngineFunc>
|
|
102
|
-
engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, InitEngineFunc func)
|
|
103
|
-
: states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
|
|
104
|
-
sizeof(std::uint32_t)) {
|
|
105
|
-
queue.submit([&](sycl::handler& cgh) {
|
|
106
|
-
accessor_type states_accessor(states_buffer_, cgh);
|
|
107
|
-
|
|
108
|
-
cgh.parallel_for<class init_kernel_ex<engine_type>>(range, [=](sycl::item<1> item) {
|
|
109
|
-
std::size_t id = item.get_id(0);
|
|
110
|
-
states_accessor.store(func(item), id);
|
|
111
|
-
});
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
accessor_type get_access(sycl::handler& cgh) {
|
|
116
|
-
return accessor_type{ states_buffer_, cgh };
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
protected:
|
|
120
|
-
sycl::buffer<std::uint32_t, 1> states_buffer_;
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
} // namespace oneapi::mkl::rng::device::detail
|
|
124
|
-
|
|
125
|
-
#endif // _MKL_RNG_DEVICE_MRG32K3A_HELPERS_IMPL_HPP_
|
mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp
DELETED
|
@@ -1,385 +0,0 @@
|
|
|
1
|
-
/*******************************************************************************
|
|
2
|
-
* Copyright 2020 Intel Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing,
|
|
11
|
-
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions
|
|
14
|
-
* and limitations under the License.
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
-
*******************************************************************************/
|
|
19
|
-
|
|
20
|
-
/*
|
|
21
|
-
// Bradley, Thomas & du Toit, Jacques & Giles, Mike & Tong, Robert & Woodhams, Paul.
|
|
22
|
-
// (2011). Parallelisation Techniques for Random Number Generators.
|
|
23
|
-
// GPU Computing Gems Emerald Edition. 10.1016/B978-0-12-384988-5.00016-4
|
|
24
|
-
*/
|
|
25
|
-
|
|
26
|
-
#ifndef _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
|
|
27
|
-
#define _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
|
|
28
|
-
|
|
29
|
-
#include "oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp"
|
|
30
|
-
|
|
31
|
-
namespace oneapi::mkl::rng::device {
|
|
32
|
-
|
|
33
|
-
template <std::int32_t VecSize = 1>
|
|
34
|
-
class mrg32k3a;
|
|
35
|
-
|
|
36
|
-
namespace detail {
|
|
37
|
-
|
|
38
|
-
template <std::int32_t VecSize>
|
|
39
|
-
struct engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
|
|
40
|
-
std::uint32_t s[6];
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
namespace mrg32k3a_impl {
|
|
44
|
-
|
|
45
|
-
struct mrg32k3a_params {
|
|
46
|
-
static constexpr std::uint32_t m1 = 4294967087;
|
|
47
|
-
static constexpr std::uint32_t m2 = 4294944443;
|
|
48
|
-
static constexpr std::uint32_t a12 = 1403580;
|
|
49
|
-
static constexpr std::uint32_t a13 = 4294156359;
|
|
50
|
-
static constexpr std::uint32_t a21 = 527612;
|
|
51
|
-
static constexpr std::uint32_t a23 = 4293573854;
|
|
52
|
-
static constexpr std::uint32_t a13n = 810728;
|
|
53
|
-
static constexpr std::uint32_t a23n = 1370589;
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
template <std::uint32_t M>
|
|
57
|
-
struct two_pow_32_minus_m {};
|
|
58
|
-
|
|
59
|
-
template <>
|
|
60
|
-
struct two_pow_32_minus_m<mrg32k3a_params::m1> {
|
|
61
|
-
static constexpr std::int64_t val = 209;
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
template <>
|
|
65
|
-
struct two_pow_32_minus_m<mrg32k3a_params::m2> {
|
|
66
|
-
static constexpr std::int64_t val = 22853;
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
template <std::int64_t M, typename T>
|
|
70
|
-
static inline void bit_shift_and_mask(T& in) {
|
|
71
|
-
T mask;
|
|
72
|
-
if constexpr (std::is_same_v<T, std::uint64_t>) {
|
|
73
|
-
mask = 0x00000000ffffffffu;
|
|
74
|
-
}
|
|
75
|
-
else {
|
|
76
|
-
mask = 0x00000000ffffffff;
|
|
77
|
-
}
|
|
78
|
-
in = ((in >> 32) * two_pow_32_minus_m<M>::val + (in & mask));
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
template <std::uint32_t M>
|
|
82
|
-
static inline void matr3x3_vec_mul_mod(std::uint32_t a[3][3], std::uint32_t x[3],
|
|
83
|
-
std::uint32_t y[3]) {
|
|
84
|
-
std::uint64_t temp[3] = { 0ull, 0ull, 0ull };
|
|
85
|
-
for (int i = 0; i < 3; ++i) {
|
|
86
|
-
for (int k = 0; k < 3; ++k) {
|
|
87
|
-
std::uint64_t tmp =
|
|
88
|
-
static_cast<std::uint64_t>(a[i][k]) * static_cast<std::uint64_t>(x[k]);
|
|
89
|
-
bit_shift_and_mask<M>(tmp);
|
|
90
|
-
bit_shift_and_mask<M>(tmp);
|
|
91
|
-
if (tmp >= M) {
|
|
92
|
-
tmp -= M;
|
|
93
|
-
}
|
|
94
|
-
temp[i] += tmp;
|
|
95
|
-
}
|
|
96
|
-
bit_shift_and_mask<M>(temp[i]);
|
|
97
|
-
if (temp[i] >= M) {
|
|
98
|
-
temp[i] -= M;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
for (int k = 0; k < 3; k++) {
|
|
103
|
-
y[k] = static_cast<std::uint32_t>(temp[k]);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
template <std::uint32_t M>
|
|
110
|
-
static inline void matr3x3_mul_mod(std::uint32_t B[3][3],
|
|
111
|
-
const std::uint32_t _skip_ahead_matrix[3][3]) {
|
|
112
|
-
std::uint64_t temp[3][3] = { { 0ull, 0ull, 0ull }, { 0ull, 0ull, 0ull }, { 0ull, 0ull, 0ull } };
|
|
113
|
-
|
|
114
|
-
for (int i = 0; i < 3; ++i) {
|
|
115
|
-
for (int j = 0; j < 3; ++j) {
|
|
116
|
-
for (int k = 0; k < 3; ++k) {
|
|
117
|
-
std::uint64_t tmp = static_cast<std::uint64_t>(B[i][k]) *
|
|
118
|
-
static_cast<std::uint64_t>(_skip_ahead_matrix[k][j]);
|
|
119
|
-
bit_shift_and_mask<M>(tmp);
|
|
120
|
-
if constexpr (mrg32k3a_params::m2 == M) {
|
|
121
|
-
bit_shift_and_mask<M>(tmp);
|
|
122
|
-
}
|
|
123
|
-
if (tmp >= M) {
|
|
124
|
-
tmp -= M;
|
|
125
|
-
}
|
|
126
|
-
temp[i][j] += tmp;
|
|
127
|
-
}
|
|
128
|
-
bit_shift_and_mask<M>(temp[i][j]);
|
|
129
|
-
if (temp[i][j] >= M) {
|
|
130
|
-
temp[i][j] -= M;
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
for (int i = 0; i < 3; ++i) {
|
|
136
|
-
for (int j = 0; j < 3; ++j) {
|
|
137
|
-
B[i][j] = static_cast<std::uint32_t>(temp[i][j]);
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
template <std::uint32_t M>
|
|
143
|
-
static inline void vec3_pow_mod(
|
|
144
|
-
std::uint32_t x[3], std::uint64_t n, const std::uint64_t* skip_params,
|
|
145
|
-
const std::uint32_t _skip_ahead_matrix[quantity_of_3x3_matrices][3][3]) {
|
|
146
|
-
std::uint32_t B[3][3] = { { 1u, 0u, 0u }, { 0u, 1u, 0u }, { 0u, 0u, 1u } };
|
|
147
|
-
|
|
148
|
-
std::uint32_t off;
|
|
149
|
-
std::uint32_t mod;
|
|
150
|
-
std::uint64_t skip_param;
|
|
151
|
-
std::uint32_t bit_count = 0; // can be 0, 1, 2
|
|
152
|
-
std::uint32_t bit_count_tmp;
|
|
153
|
-
|
|
154
|
-
for (std::uint32_t j = 0; j < n; j++) {
|
|
155
|
-
skip_param = skip_params[j];
|
|
156
|
-
off = 0;
|
|
157
|
-
bit_count_tmp = bit_count;
|
|
158
|
-
while (skip_param) {
|
|
159
|
-
// we have to multiply skip_param[1] by 2 and skip_params[2] by 4 only for the 1st iteration
|
|
160
|
-
// of the loop to get the required power of a power-of-eight matrice from a power of two
|
|
161
|
-
mod = (skip_param << static_cast<std::uint64_t>(bit_count_tmp)) &
|
|
162
|
-
7ull; // == (skip_param * _mult) % 8, _mult={1,2,4}
|
|
163
|
-
if (mod) {
|
|
164
|
-
// 7 - number of 3x3 matrices of some power of 8: 1*8^x, 2*8^x, ..., 7*8^x
|
|
165
|
-
// 7 * 21 - number of 3x3 matrices for each skip parameter
|
|
166
|
-
matr3x3_mul_mod<M>(B, _skip_ahead_matrix[7 * 21 * j + off * 7 + (mod - 1)]);
|
|
167
|
-
}
|
|
168
|
-
skip_param =
|
|
169
|
-
skip_param /
|
|
170
|
-
(8ull >> static_cast<std::uint64_t>(bit_count_tmp)); // == skip_param / (8 / _mult)
|
|
171
|
-
++off;
|
|
172
|
-
bit_count_tmp = 0;
|
|
173
|
-
}
|
|
174
|
-
++bit_count;
|
|
175
|
-
}
|
|
176
|
-
matr3x3_vec_mul_mod<M>(B, x, x);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
template <std::int32_t VecSize>
|
|
180
|
-
static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state,
|
|
181
|
-
std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
|
|
182
|
-
if (n > 3) {
|
|
183
|
-
n = 3;
|
|
184
|
-
#ifndef __SYCL_DEVICE_ONLY__
|
|
185
|
-
throw oneapi::mkl::invalid_argument("rng", "mrg32k3a",
|
|
186
|
-
"period is 2 ^ 191, skip on more than 2^192");
|
|
187
|
-
#endif
|
|
188
|
-
}
|
|
189
|
-
vec3_pow_mod<mrg32k3a_params::m1>(state.s, n, num_to_skip_ptr, skip_ahead_matrix[0]);
|
|
190
|
-
vec3_pow_mod<mrg32k3a_params::m2>(state.s + 3, n, num_to_skip_ptr, skip_ahead_matrix[1]);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
template <std::int32_t VecSize>
|
|
194
|
-
static inline void validate_seed(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
|
|
195
|
-
int i;
|
|
196
|
-
for (i = 0; i < 3; i++) {
|
|
197
|
-
if (state.s[i] >= mrg32k3a_params::m1) {
|
|
198
|
-
state.s[i] -= mrg32k3a_params::m1;
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
for (; i < 6; i++) {
|
|
202
|
-
if (state.s[i] >= mrg32k3a_params::m2) {
|
|
203
|
-
state.s[i] -= mrg32k3a_params::m2;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
if ((state.s[0]) == 0 && (state.s[1]) == 0 && (state.s[2]) == 0) {
|
|
208
|
-
state.s[0] = 1;
|
|
209
|
-
}
|
|
210
|
-
if ((state.s[3]) == 0 && (state.s[4]) == 0 && (state.s[5]) == 0) {
|
|
211
|
-
state.s[3] = 1;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
template <std::int32_t VecSize>
|
|
216
|
-
static inline void init(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state,
|
|
217
|
-
std::uint64_t n, const std::uint32_t* seed_ptr, std::uint64_t n_offset,
|
|
218
|
-
const std::uint64_t* offset_ptr) {
|
|
219
|
-
std::uint64_t i;
|
|
220
|
-
if (n > 6) {
|
|
221
|
-
n = 6;
|
|
222
|
-
}
|
|
223
|
-
for (i = 0; i < n; i++) {
|
|
224
|
-
state.s[i] = seed_ptr[i];
|
|
225
|
-
}
|
|
226
|
-
for (; i < 6; i++) {
|
|
227
|
-
state.s[i] = 1;
|
|
228
|
-
}
|
|
229
|
-
validate_seed(state);
|
|
230
|
-
mrg32k3a_impl::skip_ahead(state, n_offset, offset_ptr);
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
template <std::int32_t VecSize>
|
|
234
|
-
static inline sycl::vec<std::uint32_t, VecSize> generate(
|
|
235
|
-
engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
|
|
236
|
-
const std::int32_t num_elements = VecSize;
|
|
237
|
-
sycl::vec<std::uint32_t, VecSize> res;
|
|
238
|
-
std::int64_t x, y;
|
|
239
|
-
std::int32_t i = 0;
|
|
240
|
-
for (i = 0; i < num_elements; i++) {
|
|
241
|
-
x = mrg32k3a_params::a12 * static_cast<std::int64_t>(state.s[1]) -
|
|
242
|
-
mrg32k3a_params::a13n * static_cast<std::int64_t>(state.s[0]);
|
|
243
|
-
// perform modulus
|
|
244
|
-
bit_shift_and_mask<mrg32k3a_params::m1>(x);
|
|
245
|
-
if (x >= mrg32k3a_params::m1)
|
|
246
|
-
x -= mrg32k3a_params::m1;
|
|
247
|
-
x += ((x & 0x8000000000000000) >> 63) * mrg32k3a_params::m1;
|
|
248
|
-
y = mrg32k3a_params::a21 * static_cast<std::int64_t>(state.s[5]) -
|
|
249
|
-
mrg32k3a_params::a23n * static_cast<std::int64_t>(state.s[3]);
|
|
250
|
-
// perform modulus
|
|
251
|
-
bit_shift_and_mask<mrg32k3a_params::m2>(y);
|
|
252
|
-
bit_shift_and_mask<mrg32k3a_params::m2>(y);
|
|
253
|
-
if (y >= mrg32k3a_params::m2)
|
|
254
|
-
y -= mrg32k3a_params::m2;
|
|
255
|
-
y += ((y & 0x8000000000000000) >> 63) * mrg32k3a_params::m2;
|
|
256
|
-
state.s[0] = state.s[1];
|
|
257
|
-
state.s[1] = state.s[2];
|
|
258
|
-
state.s[2] = x;
|
|
259
|
-
state.s[3] = state.s[4];
|
|
260
|
-
state.s[4] = state.s[5];
|
|
261
|
-
state.s[5] = y;
|
|
262
|
-
if (x <= y) {
|
|
263
|
-
res[i] = x + (mrg32k3a_params::m1 - y);
|
|
264
|
-
}
|
|
265
|
-
else {
|
|
266
|
-
res[i] = x - y;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
return res;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
template <std::int32_t VecSize>
|
|
273
|
-
static inline std::uint32_t generate_single(
|
|
274
|
-
engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
|
|
275
|
-
std::uint32_t res;
|
|
276
|
-
std::int64_t x, y;
|
|
277
|
-
x = mrg32k3a_params::a12 * static_cast<std::int64_t>(state.s[1]) -
|
|
278
|
-
mrg32k3a_params::a13n * static_cast<std::int64_t>(state.s[0]);
|
|
279
|
-
// perform modulus
|
|
280
|
-
bit_shift_and_mask<mrg32k3a_params::m1>(x);
|
|
281
|
-
if (x >= mrg32k3a_params::m1)
|
|
282
|
-
x -= mrg32k3a_params::m1;
|
|
283
|
-
x += ((x & 0x8000000000000000) >> 63) * mrg32k3a_params::m1;
|
|
284
|
-
y = mrg32k3a_params::a21 * static_cast<std::int64_t>(state.s[5]) -
|
|
285
|
-
mrg32k3a_params::a23n * static_cast<std::int64_t>(state.s[3]);
|
|
286
|
-
// perform modulus
|
|
287
|
-
bit_shift_and_mask<mrg32k3a_params::m2>(y);
|
|
288
|
-
bit_shift_and_mask<mrg32k3a_params::m2>(y);
|
|
289
|
-
if (y >= mrg32k3a_params::m2)
|
|
290
|
-
y -= mrg32k3a_params::m2;
|
|
291
|
-
y += ((y & 0x8000000000000000) >> 63) * mrg32k3a_params::m2;
|
|
292
|
-
state.s[0] = state.s[1];
|
|
293
|
-
state.s[1] = state.s[2];
|
|
294
|
-
state.s[2] = x;
|
|
295
|
-
state.s[3] = state.s[4];
|
|
296
|
-
state.s[4] = state.s[5];
|
|
297
|
-
state.s[5] = y;
|
|
298
|
-
if (x <= y) {
|
|
299
|
-
res = x + (mrg32k3a_params::m1 - y);
|
|
300
|
-
}
|
|
301
|
-
else {
|
|
302
|
-
res = x - y;
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
return res;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
} // namespace mrg32k3a_impl
|
|
309
|
-
|
|
310
|
-
template <std::int32_t VecSize>
|
|
311
|
-
class engine_base<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
|
|
312
|
-
protected:
|
|
313
|
-
engine_base(std::uint32_t seed, std::uint64_t offset = 0) {
|
|
314
|
-
mrg32k3a_impl::init(this->state_, 1, &seed, 1, &offset);
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
engine_base(std::uint64_t n, const std::uint32_t* seed, std::uint64_t offset = 0) {
|
|
318
|
-
mrg32k3a_impl::init(this->state_, n, seed, 1, &offset);
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
engine_base(std::uint32_t seed, std::uint64_t n_offset, const std::uint64_t* offset_ptr) {
|
|
322
|
-
mrg32k3a_impl::init(this->state_, 1, &seed, n_offset, offset_ptr);
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
engine_base(std::uint64_t n, const std::uint32_t* seed, std::uint64_t n_offset,
|
|
326
|
-
const std::uint64_t* offset_ptr) {
|
|
327
|
-
mrg32k3a_impl::init(this->state_, n, seed, n_offset, offset_ptr);
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
template <typename RealType>
|
|
331
|
-
auto generate(RealType a, RealType b) ->
|
|
332
|
-
typename std::conditional<VecSize == 1, RealType, sycl::vec<RealType, VecSize>>::type {
|
|
333
|
-
sycl::vec<RealType, VecSize> res;
|
|
334
|
-
sycl::vec<std::uint32_t, VecSize> res_uint;
|
|
335
|
-
RealType c;
|
|
336
|
-
|
|
337
|
-
c = (b - a) / (static_cast<RealType>(mrg32k3a_impl::mrg32k3a_params::m1));
|
|
338
|
-
|
|
339
|
-
res_uint = mrg32k3a_impl::generate(this->state_);
|
|
340
|
-
|
|
341
|
-
for (int i = 0; i < VecSize; i++) {
|
|
342
|
-
res[i] = (RealType)(res_uint[i]) * c + a;
|
|
343
|
-
}
|
|
344
|
-
return res;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
auto generate() -> typename std::conditional<VecSize == 1, std::uint32_t,
|
|
348
|
-
sycl::vec<std::uint32_t, VecSize>>::type {
|
|
349
|
-
return mrg32k3a_impl::generate(this->state_);
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
template <typename RealType>
|
|
353
|
-
RealType generate_single(RealType a, RealType b) {
|
|
354
|
-
RealType res;
|
|
355
|
-
std::uint32_t res_uint;
|
|
356
|
-
RealType c;
|
|
357
|
-
|
|
358
|
-
c = (b - a) / (static_cast<RealType>(mrg32k3a_impl::mrg32k3a_params::m1));
|
|
359
|
-
|
|
360
|
-
res_uint = mrg32k3a_impl::generate_single(this->state_);
|
|
361
|
-
|
|
362
|
-
res = (RealType)(res_uint)*c + a;
|
|
363
|
-
|
|
364
|
-
return res;
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
std::uint32_t generate_single() {
|
|
368
|
-
return mrg32k3a_impl::generate_single(this->state_);
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
void skip_ahead(std::uint64_t num_to_skip) {
|
|
372
|
-
detail::mrg32k3a_impl::skip_ahead(this->state_, 1, &num_to_skip);
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) {
|
|
376
|
-
detail::mrg32k3a_impl::skip_ahead(this->state_, num_to_skip.size(), num_to_skip.begin());
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>> state_;
|
|
380
|
-
};
|
|
381
|
-
|
|
382
|
-
} // namespace detail
|
|
383
|
-
} // namespace oneapi::mkl::rng::device
|
|
384
|
-
|
|
385
|
-
#endif // _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
|