mkl-devel-dpcpp 2025.0.1__py2.py3-none-win_amd64.whl → 2025.2.0__py2.py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkl-devel-dpcpp might be problematic. Click here for more details.
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
- mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/METADATA +4 -3
- mkl_devel_dpcpp-2025.2.0.dist-info/RECORD +15 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -26
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -880
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/types.hpp +0 -60
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -42
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -1240
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas.hpp +0 -33
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dft.hpp +0 -253
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dfti.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/exceptions.hpp +0 -110
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -67
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -68
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -177
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/export.hpp +0 -25
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -55
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -75
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/lapack.hpp +0 -1095
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -106
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack.hpp +0 -23
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -48
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -89
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +0 -464
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -71
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -43
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -54
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +0 -285
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -270
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -105
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -117
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +0 -223
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -118
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +0 -266
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -125
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -385
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -3668
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -141
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -552
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -355
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -58
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -51
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -289
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +0 -183
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +0 -637
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -116
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engines.hpp +0 -187
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -59
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/types.hpp +0 -74
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device.hpp +0 -29
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/distributions.hpp +0 -1913
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -788
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -163
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng.hpp +0 -22
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +0 -111
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +0 -446
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +0 -193
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas.hpp +0 -32
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/stats.hpp +0 -356
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/types.hpp +0 -321
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -3529
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -280
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -81
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -1059
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -861
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -860
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -4608
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -8963
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -460
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/span.hpp +0 -3813
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -3581
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm.hpp +0 -30
- mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl.hpp +0 -34
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
- mkl_devel_dpcpp-2025.0.1.dist-info/RECORD +0 -90
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/LICENSE.txt +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/WHEEL +0 -0
- {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
/*******************************************************************************
|
|
2
|
-
* Copyright 2023 Intel Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing,
|
|
11
|
-
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions
|
|
14
|
-
* and limitations under the License.
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
-
*******************************************************************************/
|
|
19
|
-
|
|
20
|
-
#ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
|
|
21
|
-
#define _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include "oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp"
|
|
24
|
-
|
|
25
|
-
namespace oneapi::mkl::rng::device::detail {
|
|
26
|
-
namespace philox4x32x10_impl {
|
|
27
|
-
|
|
28
|
-
template <std::int32_t VecSize>
|
|
29
|
-
static inline void init(
|
|
30
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
|
|
31
|
-
const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
|
|
32
|
-
std::size_t num_elements_acc =
|
|
33
|
-
sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
|
|
34
|
-
sizeof(std::uint32_t);
|
|
35
|
-
state.key[0] = accessor[id * num_elements_acc];
|
|
36
|
-
state.key[1] = accessor[id * num_elements_acc + 1];
|
|
37
|
-
state.counter[0] = accessor[id * num_elements_acc + 2];
|
|
38
|
-
state.counter[1] = accessor[id * num_elements_acc + 3];
|
|
39
|
-
state.counter[2] = accessor[id * num_elements_acc + 4];
|
|
40
|
-
state.counter[3] = accessor[id * num_elements_acc + 5];
|
|
41
|
-
|
|
42
|
-
state.part = accessor[id * num_elements_acc + 6];
|
|
43
|
-
|
|
44
|
-
state.result[0] = accessor[id * num_elements_acc + 7];
|
|
45
|
-
state.result[1] = accessor[id * num_elements_acc + 8];
|
|
46
|
-
state.result[2] = accessor[id * num_elements_acc + 9];
|
|
47
|
-
state.result[3] = accessor[id * num_elements_acc + 10];
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
template <std::int32_t VecSize>
|
|
51
|
-
static inline void store(
|
|
52
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
|
|
53
|
-
const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
|
|
54
|
-
std::size_t num_elements_acc =
|
|
55
|
-
sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
|
|
56
|
-
sizeof(std::uint32_t);
|
|
57
|
-
accessor[id * num_elements_acc] = state.key[0];
|
|
58
|
-
accessor[id * num_elements_acc + 1] = state.key[1];
|
|
59
|
-
accessor[id * num_elements_acc + 2] = state.counter[0];
|
|
60
|
-
accessor[id * num_elements_acc + 3] = state.counter[1];
|
|
61
|
-
accessor[id * num_elements_acc + 4] = state.counter[2];
|
|
62
|
-
accessor[id * num_elements_acc + 5] = state.counter[3];
|
|
63
|
-
accessor[id * num_elements_acc + 6] = state.part;
|
|
64
|
-
accessor[id * num_elements_acc + 7] = state.result[0];
|
|
65
|
-
accessor[id * num_elements_acc + 8] = state.result[1];
|
|
66
|
-
accessor[id * num_elements_acc + 9] = state.result[2];
|
|
67
|
-
accessor[id * num_elements_acc + 10] = state.result[3];
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
} // namespace philox4x32x10_impl
|
|
71
|
-
|
|
72
|
-
template <std::int32_t VecSize>
|
|
73
|
-
class engine_accessor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
|
|
74
|
-
public:
|
|
75
|
-
engine_accessor_base(sycl::buffer<std::uint32_t, 1>& state_buf, sycl::handler& cgh)
|
|
76
|
-
: states_accessor_(state_buf, cgh) {}
|
|
77
|
-
|
|
78
|
-
oneapi::mkl::rng::device::philox4x32x10<VecSize> load(std::size_t id) const {
|
|
79
|
-
oneapi::mkl::rng::device::philox4x32x10<VecSize> engine;
|
|
80
|
-
philox4x32x10_impl::init(engine.state_, id, states_accessor_);
|
|
81
|
-
return engine;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
void store(oneapi::mkl::rng::device::philox4x32x10<VecSize>& engine, std::size_t id) const {
|
|
85
|
-
philox4x32x10_impl::store(engine.state_, id, states_accessor_);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
protected:
|
|
89
|
-
sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write> states_accessor_;
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
template <std::int32_t VecSize>
|
|
93
|
-
class engine_descriptor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
|
|
94
|
-
public:
|
|
95
|
-
using engine_type = oneapi::mkl::rng::device::philox4x32x10<VecSize>;
|
|
96
|
-
|
|
97
|
-
using accessor_type =
|
|
98
|
-
oneapi::mkl::rng::device::engine_accessor<oneapi::mkl::rng::device::philox4x32x10<VecSize>>;
|
|
99
|
-
|
|
100
|
-
engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, std::uint64_t seed,
|
|
101
|
-
std::uint64_t offset)
|
|
102
|
-
: states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
|
|
103
|
-
sizeof(std::uint32_t)) {
|
|
104
|
-
queue.submit([&](sycl::handler& cgh) {
|
|
105
|
-
accessor_type states_accessor(states_buffer_, cgh);
|
|
106
|
-
|
|
107
|
-
cgh.parallel_for<class init_kernel<engine_type>>
|
|
108
|
-
(range, [=](sycl::item<1> item) {
|
|
109
|
-
std::size_t id = item.get_id(0);
|
|
110
|
-
oneapi::mkl::rng::device::philox4x32x10<VecSize> engine(seed, offset* id);
|
|
111
|
-
states_accessor.store(engine, id);
|
|
112
|
-
});
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
template <typename InitEngineFunc>
|
|
117
|
-
engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, InitEngineFunc func)
|
|
118
|
-
: states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
|
|
119
|
-
sizeof(std::uint32_t)) {
|
|
120
|
-
queue.submit([&](sycl::handler& cgh) {
|
|
121
|
-
accessor_type states_accessor(states_buffer_, cgh);
|
|
122
|
-
|
|
123
|
-
cgh.parallel_for<class init_kernel_ex<engine_type>>
|
|
124
|
-
(range, [=](sycl::item<1> item) {
|
|
125
|
-
std::size_t id = item.get_id(0);
|
|
126
|
-
states_accessor.store(func(item), id);
|
|
127
|
-
});
|
|
128
|
-
});
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
accessor_type get_access(sycl::handler& cgh) {
|
|
132
|
-
return accessor_type{ states_buffer_, cgh };
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
protected:
|
|
136
|
-
sycl::buffer<std::uint32_t, 1> states_buffer_;
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
} // namespace oneapi::mkl::rng::device::detail
|
|
140
|
-
|
|
141
|
-
#endif // _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
|
|
@@ -1,552 +0,0 @@
|
|
|
1
|
-
/*******************************************************************************
|
|
2
|
-
* Copyright 2020 Intel Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing,
|
|
11
|
-
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions
|
|
14
|
-
* and limitations under the License.
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
-
*******************************************************************************/
|
|
19
|
-
|
|
20
|
-
#ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
|
|
21
|
-
#define _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include <utility> // std::pair
|
|
24
|
-
|
|
25
|
-
namespace oneapi::mkl::rng::device {
|
|
26
|
-
|
|
27
|
-
template <std::int32_t VecSize = 1>
|
|
28
|
-
class philox4x32x10;
|
|
29
|
-
|
|
30
|
-
namespace detail {
|
|
31
|
-
|
|
32
|
-
template <std::int32_t VecSize>
|
|
33
|
-
struct engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
|
|
34
|
-
std::uint32_t key[2];
|
|
35
|
-
std::uint32_t counter[4];
|
|
36
|
-
std::uint32_t part;
|
|
37
|
-
std::uint32_t result[4];
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
namespace philox4x32x10_impl {
|
|
41
|
-
|
|
42
|
-
static inline void add128(std::uint32_t* a, std::uint64_t b) {
|
|
43
|
-
std::uint64_t tmp = ((static_cast<std::uint64_t>(a[1]) << 32) | a[0]);
|
|
44
|
-
|
|
45
|
-
tmp += b;
|
|
46
|
-
|
|
47
|
-
a[0] = static_cast<std::uint32_t>(tmp);
|
|
48
|
-
a[1] = static_cast<std::uint32_t>(tmp >> 32);
|
|
49
|
-
|
|
50
|
-
if (tmp < b) {
|
|
51
|
-
tmp = ((static_cast<std::uint64_t>(a[3]) << 32) | a[2]) + 1;
|
|
52
|
-
|
|
53
|
-
a[2] = static_cast<std::uint32_t>(tmp);
|
|
54
|
-
a[3] = static_cast<std::uint32_t>(tmp >> 32);
|
|
55
|
-
}
|
|
56
|
-
return;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
static inline void add128_1(std::uint32_t* a) {
|
|
60
|
-
if (++a[0]) {
|
|
61
|
-
return;
|
|
62
|
-
}
|
|
63
|
-
if (++a[1]) {
|
|
64
|
-
return;
|
|
65
|
-
}
|
|
66
|
-
if (++a[2]) {
|
|
67
|
-
return;
|
|
68
|
-
}
|
|
69
|
-
++a[3];
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
static inline std::pair<std::uint32_t, std::uint32_t> mul_hilo_32(std::uint32_t a,
|
|
73
|
-
std::uint32_t b) {
|
|
74
|
-
std::uint64_t res_64 = static_cast<std::uint64_t>(a) * static_cast<std::uint64_t>(b);
|
|
75
|
-
return std::make_pair(static_cast<std::uint32_t>(res_64),
|
|
76
|
-
static_cast<std::uint32_t>(res_64 >> 32));
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
static inline void round(std::uint32_t* cnt, std::uint32_t* k) {
|
|
80
|
-
auto [L0, H0] = mul_hilo_32(0xD2511F53, cnt[0]);
|
|
81
|
-
auto [L1, H1] = mul_hilo_32(0xCD9E8D57, cnt[2]);
|
|
82
|
-
|
|
83
|
-
cnt[0] = H1 ^ cnt[1] ^ k[0];
|
|
84
|
-
cnt[1] = L1;
|
|
85
|
-
cnt[2] = H0 ^ cnt[3] ^ k[1];
|
|
86
|
-
cnt[3] = L0;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
static inline void round_10(std::uint32_t* cnt, std::uint32_t* k) {
|
|
90
|
-
round(cnt, k); // 1
|
|
91
|
-
// increasing keys with philox4x32x10 constants
|
|
92
|
-
k[0] += 0x9E3779B9;
|
|
93
|
-
k[1] += 0xBB67AE85;
|
|
94
|
-
round(cnt, k); // 2
|
|
95
|
-
k[0] += 0x9E3779B9;
|
|
96
|
-
k[1] += 0xBB67AE85;
|
|
97
|
-
round(cnt, k); // 3
|
|
98
|
-
k[0] += 0x9E3779B9;
|
|
99
|
-
k[1] += 0xBB67AE85;
|
|
100
|
-
round(cnt, k); // 4
|
|
101
|
-
k[0] += 0x9E3779B9;
|
|
102
|
-
k[1] += 0xBB67AE85;
|
|
103
|
-
round(cnt, k); // 5
|
|
104
|
-
k[0] += 0x9E3779B9;
|
|
105
|
-
k[1] += 0xBB67AE85;
|
|
106
|
-
round(cnt, k); // 6
|
|
107
|
-
k[0] += 0x9E3779B9;
|
|
108
|
-
k[1] += 0xBB67AE85;
|
|
109
|
-
round(cnt, k); // 7
|
|
110
|
-
k[0] += 0x9E3779B9;
|
|
111
|
-
k[1] += 0xBB67AE85;
|
|
112
|
-
round(cnt, k); // 8
|
|
113
|
-
k[0] += 0x9E3779B9;
|
|
114
|
-
k[1] += 0xBB67AE85;
|
|
115
|
-
round(cnt, k); // 9
|
|
116
|
-
k[0] += 0x9E3779B9;
|
|
117
|
-
k[1] += 0xBB67AE85;
|
|
118
|
-
round(cnt, k); // 10
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
template <std::int32_t VecSize>
|
|
122
|
-
static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
|
|
123
|
-
std::uint64_t num_to_skip) {
|
|
124
|
-
std::uint64_t num_to_skip_tmp = num_to_skip;
|
|
125
|
-
std::uint64_t c_inc;
|
|
126
|
-
std::uint32_t counter[4];
|
|
127
|
-
std::uint32_t key[2];
|
|
128
|
-
std::uint64_t tail;
|
|
129
|
-
if (num_to_skip_tmp <= state.part) {
|
|
130
|
-
state.part -= num_to_skip_tmp;
|
|
131
|
-
}
|
|
132
|
-
else {
|
|
133
|
-
tail = num_to_skip % 4;
|
|
134
|
-
if ((tail == 0) && (state.part == 0)) {
|
|
135
|
-
add128(state.counter, num_to_skip / 4);
|
|
136
|
-
}
|
|
137
|
-
else {
|
|
138
|
-
num_to_skip_tmp = num_to_skip_tmp - state.part;
|
|
139
|
-
state.part = 0;
|
|
140
|
-
c_inc = (num_to_skip_tmp - 1) / 4;
|
|
141
|
-
state.part = (4 - num_to_skip_tmp % 4) % 4;
|
|
142
|
-
add128(state.counter, c_inc);
|
|
143
|
-
counter[0] = state.counter[0];
|
|
144
|
-
counter[1] = state.counter[1];
|
|
145
|
-
counter[2] = state.counter[2];
|
|
146
|
-
counter[3] = state.counter[3];
|
|
147
|
-
key[0] = state.key[0];
|
|
148
|
-
key[1] = state.key[1];
|
|
149
|
-
round_10(counter, key);
|
|
150
|
-
state.result[0] = counter[0];
|
|
151
|
-
state.result[1] = counter[1];
|
|
152
|
-
state.result[2] = counter[2];
|
|
153
|
-
state.result[3] = counter[3];
|
|
154
|
-
add128_1(state.counter);
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
template <std::int32_t VecSize>
|
|
160
|
-
static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
|
|
161
|
-
std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
|
|
162
|
-
constexpr std::uint64_t uint_max = 0xFFFFFFFFFFFFFFFF;
|
|
163
|
-
std::uint64_t post_buffer, pre_buffer;
|
|
164
|
-
std::int32_t num_elements = 0;
|
|
165
|
-
std::int32_t remained_counter;
|
|
166
|
-
std::uint64_t tmp_skip_array[3] = { 0, 0, 0 };
|
|
167
|
-
|
|
168
|
-
for (std::uint64_t i = 0; (i < 3) && (i < n); i++) {
|
|
169
|
-
tmp_skip_array[i] = num_to_skip_ptr[i];
|
|
170
|
-
if (tmp_skip_array[i]) {
|
|
171
|
-
num_elements = i + 1;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
if (num_elements == 0) {
|
|
176
|
-
return;
|
|
177
|
-
}
|
|
178
|
-
if ((num_elements == 1) && (tmp_skip_array[0] <= state.part)) {
|
|
179
|
-
state.part -= static_cast<std::uint32_t>(tmp_skip_array[0]);
|
|
180
|
-
return;
|
|
181
|
-
}
|
|
182
|
-
std::uint32_t counter[4];
|
|
183
|
-
std::uint32_t key[2];
|
|
184
|
-
|
|
185
|
-
if ((tmp_skip_array[0] - state.part) <= tmp_skip_array[0]) {
|
|
186
|
-
tmp_skip_array[0] = tmp_skip_array[0] - state.part;
|
|
187
|
-
}
|
|
188
|
-
else if ((num_elements == 2) || (tmp_skip_array[1] - 1 < tmp_skip_array[1])) {
|
|
189
|
-
tmp_skip_array[1] = tmp_skip_array[1] - 1;
|
|
190
|
-
tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
tmp_skip_array[2] = tmp_skip_array[2] - 1;
|
|
194
|
-
tmp_skip_array[1] = uint_max - 1;
|
|
195
|
-
tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
state.part = 0;
|
|
199
|
-
|
|
200
|
-
post_buffer = 0;
|
|
201
|
-
|
|
202
|
-
remained_counter = static_cast<std::uint32_t>(tmp_skip_array[0] % 4);
|
|
203
|
-
|
|
204
|
-
for (int i = num_elements - 1; i >= 0; i--) {
|
|
205
|
-
pre_buffer = (tmp_skip_array[i] << 62);
|
|
206
|
-
tmp_skip_array[i] >>= 2;
|
|
207
|
-
tmp_skip_array[i] |= post_buffer;
|
|
208
|
-
post_buffer = pre_buffer;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
state.part = 4 - remained_counter;
|
|
212
|
-
|
|
213
|
-
std::uint64_t counter64[] = { state.counter[1], state.counter[3] };
|
|
214
|
-
counter64[0] = ((counter64[0] << 32ull) | state.counter[0]);
|
|
215
|
-
counter64[1] = ((counter64[1] << 32ull) | state.counter[2]);
|
|
216
|
-
|
|
217
|
-
counter64[0] += tmp_skip_array[0];
|
|
218
|
-
|
|
219
|
-
if (counter64[0] < tmp_skip_array[0]) {
|
|
220
|
-
counter64[1]++;
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
counter64[1] += tmp_skip_array[1];
|
|
224
|
-
|
|
225
|
-
counter[0] = static_cast<std::uint32_t>(counter64[0]);
|
|
226
|
-
counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
|
|
227
|
-
counter[2] = static_cast<std::uint32_t>(counter64[1]);
|
|
228
|
-
counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
|
|
229
|
-
|
|
230
|
-
key[0] = state.key[0];
|
|
231
|
-
key[1] = state.key[1];
|
|
232
|
-
|
|
233
|
-
round_10(counter, key);
|
|
234
|
-
|
|
235
|
-
state.result[0] = counter[0];
|
|
236
|
-
state.result[1] = counter[1];
|
|
237
|
-
state.result[2] = counter[2];
|
|
238
|
-
state.result[3] = counter[3];
|
|
239
|
-
|
|
240
|
-
counter64[0]++;
|
|
241
|
-
|
|
242
|
-
if (counter64[0] < 1) {
|
|
243
|
-
counter64[1]++;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
state.counter[0] = static_cast<std::uint32_t>(counter64[0]);
|
|
247
|
-
state.counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
|
|
248
|
-
state.counter[2] = static_cast<std::uint32_t>(counter64[1]);
|
|
249
|
-
state.counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
template <std::int32_t VecSize>
|
|
253
|
-
static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
|
|
254
|
-
std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t offset) {
|
|
255
|
-
state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
|
|
256
|
-
state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
|
|
257
|
-
|
|
258
|
-
state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
|
|
259
|
-
state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
|
|
260
|
-
|
|
261
|
-
state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
|
|
262
|
-
state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
|
|
263
|
-
|
|
264
|
-
state.part = 0;
|
|
265
|
-
state.result[0] = 0;
|
|
266
|
-
state.result[1] = 0;
|
|
267
|
-
state.result[2] = 0;
|
|
268
|
-
state.result[3] = 0;
|
|
269
|
-
skip_ahead(state, offset);
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
template <std::int32_t VecSize>
|
|
273
|
-
static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
|
|
274
|
-
std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t n_offset,
|
|
275
|
-
const std::uint64_t* offset_ptr) {
|
|
276
|
-
state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
|
|
277
|
-
state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
|
|
278
|
-
|
|
279
|
-
state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
|
|
280
|
-
state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
|
|
281
|
-
|
|
282
|
-
state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
|
|
283
|
-
state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
|
|
284
|
-
|
|
285
|
-
state.part = 0;
|
|
286
|
-
state.result[0] = 0;
|
|
287
|
-
state.result[1] = 0;
|
|
288
|
-
state.result[2] = 0;
|
|
289
|
-
state.result[3] = 0;
|
|
290
|
-
skip_ahead(state, n_offset, offset_ptr);
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
// for VecSize > 4
|
|
294
|
-
template <std::int32_t VecSize>
|
|
295
|
-
__attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_full(
|
|
296
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
|
|
297
|
-
const std::int32_t num_elements = VecSize;
|
|
298
|
-
sycl::vec<std::uint32_t, VecSize> res;
|
|
299
|
-
|
|
300
|
-
std::uint32_t counter[4];
|
|
301
|
-
|
|
302
|
-
int i = 0;
|
|
303
|
-
int part = (int)state.part;
|
|
304
|
-
while (part && (i < num_elements)) {
|
|
305
|
-
res[i++] = state.result[3 - (--part)];
|
|
306
|
-
}
|
|
307
|
-
if (i == num_elements) {
|
|
308
|
-
skip_ahead(state, num_elements);
|
|
309
|
-
return res;
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
counter[0] = state.counter[0];
|
|
313
|
-
counter[1] = state.counter[1];
|
|
314
|
-
counter[2] = state.counter[2];
|
|
315
|
-
counter[3] = state.counter[3];
|
|
316
|
-
|
|
317
|
-
std::uint32_t cntTmp[4];
|
|
318
|
-
std::uint32_t keyTmp[2];
|
|
319
|
-
for (; i < num_elements; i += 4) {
|
|
320
|
-
cntTmp[0] = counter[0];
|
|
321
|
-
cntTmp[1] = counter[1];
|
|
322
|
-
cntTmp[2] = counter[2];
|
|
323
|
-
cntTmp[3] = counter[3];
|
|
324
|
-
|
|
325
|
-
keyTmp[0] = state.key[0];
|
|
326
|
-
keyTmp[1] = state.key[1];
|
|
327
|
-
|
|
328
|
-
round_10(cntTmp, keyTmp);
|
|
329
|
-
|
|
330
|
-
if (i + 4 <= num_elements) {
|
|
331
|
-
for (int j = 0; j < 4; j++) {
|
|
332
|
-
res[i + j] = cntTmp[j];
|
|
333
|
-
}
|
|
334
|
-
add128_1(counter);
|
|
335
|
-
}
|
|
336
|
-
else {
|
|
337
|
-
// here if last iteration
|
|
338
|
-
for (int j = 0; i < num_elements; i++, j++) {
|
|
339
|
-
res[i] = cntTmp[j];
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
skip_ahead(state, num_elements);
|
|
344
|
-
return res;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// for VecSize <= 4
|
|
348
|
-
template <std::int32_t VecSize>
|
|
349
|
-
__attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_small(
|
|
350
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
|
|
351
|
-
const std::int32_t num_elements = VecSize;
|
|
352
|
-
sycl::vec<std::uint32_t, VecSize> res;
|
|
353
|
-
|
|
354
|
-
std::uint32_t counter[4];
|
|
355
|
-
std::uint32_t key[2];
|
|
356
|
-
|
|
357
|
-
int i = 0;
|
|
358
|
-
int part = (int)state.part;
|
|
359
|
-
while (part && (i < num_elements)) {
|
|
360
|
-
res[i++] = state.result[3 - (--part)];
|
|
361
|
-
}
|
|
362
|
-
if (i == num_elements) {
|
|
363
|
-
skip_ahead(state, num_elements);
|
|
364
|
-
return res;
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
counter[0] = state.counter[0];
|
|
368
|
-
counter[1] = state.counter[1];
|
|
369
|
-
counter[2] = state.counter[2];
|
|
370
|
-
counter[3] = state.counter[3];
|
|
371
|
-
key[0] = state.key[0];
|
|
372
|
-
key[1] = state.key[1];
|
|
373
|
-
|
|
374
|
-
round_10(counter, key);
|
|
375
|
-
|
|
376
|
-
for (int j = 0; i < num_elements; i++, j++) {
|
|
377
|
-
res[i] = counter[j];
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
skip_ahead(state, num_elements);
|
|
381
|
-
return res;
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
template <int VecSize>
|
|
385
|
-
__attribute__((always_inline)) static inline std::uint32_t generate_single(
|
|
386
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
|
|
387
|
-
std::uint32_t res;
|
|
388
|
-
|
|
389
|
-
std::uint32_t counter[4];
|
|
390
|
-
std::uint32_t key[2];
|
|
391
|
-
|
|
392
|
-
std::int32_t part = static_cast<std::int32_t>(state.part);
|
|
393
|
-
if (part != 0) {
|
|
394
|
-
res = state.result[3 - (--part)];
|
|
395
|
-
skip_ahead(state, 1);
|
|
396
|
-
return res;
|
|
397
|
-
}
|
|
398
|
-
counter[0] = state.counter[0];
|
|
399
|
-
counter[1] = state.counter[1];
|
|
400
|
-
counter[2] = state.counter[2];
|
|
401
|
-
counter[3] = state.counter[3];
|
|
402
|
-
key[0] = state.key[0];
|
|
403
|
-
key[1] = state.key[1];
|
|
404
|
-
|
|
405
|
-
round_10(counter, key);
|
|
406
|
-
|
|
407
|
-
res = counter[0];
|
|
408
|
-
|
|
409
|
-
skip_ahead(state, 1);
|
|
410
|
-
return res;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
} // namespace philox4x32x10_impl
|
|
414
|
-
|
|
415
|
-
template <std::int32_t VecSize>
|
|
416
|
-
class engine_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
|
|
417
|
-
protected:
|
|
418
|
-
engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
|
|
419
|
-
philox4x32x10_impl::init(this->state_, 1, &seed, offset);
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t offset = 0) {
|
|
423
|
-
philox4x32x10_impl::init(this->state_, n, seed, offset);
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
engine_base(std::uint64_t seed, std::uint64_t n_offset, const std::uint64_t* offset_ptr) {
|
|
427
|
-
philox4x32x10_impl::init(this->state_, 1, &seed, n_offset, offset_ptr);
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t n_offset,
|
|
431
|
-
const std::uint64_t* offset_ptr) {
|
|
432
|
-
philox4x32x10_impl::init(this->state_, n, seed, n_offset, offset_ptr);
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
template <typename RealType>
|
|
436
|
-
__attribute__((always_inline)) inline auto generate(RealType a, RealType b) ->
|
|
437
|
-
typename std::conditional<VecSize == 1, RealType, sycl::vec<RealType, VecSize>>::type {
|
|
438
|
-
sycl::vec<RealType, VecSize> res;
|
|
439
|
-
sycl::vec<std::uint32_t, VecSize> res_uint;
|
|
440
|
-
RealType a1;
|
|
441
|
-
RealType c1;
|
|
442
|
-
|
|
443
|
-
c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
|
|
444
|
-
a1 = (b + a) / static_cast<RealType>(2.0);
|
|
445
|
-
|
|
446
|
-
if constexpr (VecSize > 4) {
|
|
447
|
-
res_uint = philox4x32x10_impl::generate_full(this->state_);
|
|
448
|
-
}
|
|
449
|
-
else {
|
|
450
|
-
res_uint = philox4x32x10_impl::generate_small(this->state_);
|
|
451
|
-
}
|
|
452
|
-
for (int i = 0; i < VecSize; i++) {
|
|
453
|
-
res[i] = static_cast<RealType>(static_cast<std::int32_t>(res_uint[i])) * c1 + a1;
|
|
454
|
-
}
|
|
455
|
-
return res;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
__attribute__((always_inline)) inline auto generate() ->
|
|
459
|
-
typename std::conditional<VecSize == 1, std::uint32_t,
|
|
460
|
-
sycl::vec<std::uint32_t, VecSize>>::type {
|
|
461
|
-
if constexpr (VecSize > 4) {
|
|
462
|
-
return philox4x32x10_impl::generate_full(this->state_);
|
|
463
|
-
}
|
|
464
|
-
return philox4x32x10_impl::generate_small(this->state_);
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
template <typename UIntType>
|
|
468
|
-
__attribute__((always_inline)) inline auto generate_uniform_bits() ->
|
|
469
|
-
typename std::conditional<VecSize == 1, UIntType, sycl::vec<UIntType, VecSize>>::type {
|
|
470
|
-
if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
|
|
471
|
-
return generate();
|
|
472
|
-
}
|
|
473
|
-
else {
|
|
474
|
-
auto uni_res1 = generate();
|
|
475
|
-
auto uni_res2 = generate();
|
|
476
|
-
|
|
477
|
-
if constexpr (VecSize == 1) {
|
|
478
|
-
return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
|
|
479
|
-
}
|
|
480
|
-
else {
|
|
481
|
-
sycl::vec<std::uint64_t, VecSize> vec_out;
|
|
482
|
-
|
|
483
|
-
if constexpr (VecSize != 3) {
|
|
484
|
-
for (int i = 0; i < VecSize / 2; i++) {
|
|
485
|
-
vec_out[i] = (static_cast<std::uint64_t>(uni_res1[2 * i + 1]) << 32) +
|
|
486
|
-
uni_res1[2 * i];
|
|
487
|
-
vec_out[i + VecSize / 2] =
|
|
488
|
-
(static_cast<std::uint64_t>(uni_res2[2 * i + 1]) << 32) +
|
|
489
|
-
uni_res2[2 * i];
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
else {
|
|
493
|
-
vec_out[0] = (static_cast<std::uint64_t>(uni_res1[1]) << 32) + uni_res1[0];
|
|
494
|
-
vec_out[1] = (static_cast<std::uint64_t>(uni_res2[0]) << 32) + uni_res1[2];
|
|
495
|
-
vec_out[2] = (static_cast<std::uint64_t>(uni_res2[2]) << 32) + uni_res2[1];
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
return vec_out;
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
template <typename RealType>
|
|
504
|
-
RealType generate_single(RealType a, RealType b) {
|
|
505
|
-
RealType res;
|
|
506
|
-
std::uint32_t res_uint;
|
|
507
|
-
RealType a1;
|
|
508
|
-
RealType c1;
|
|
509
|
-
|
|
510
|
-
c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
|
|
511
|
-
a1 = (b + a) / static_cast<RealType>(2.0);
|
|
512
|
-
|
|
513
|
-
res_uint = philox4x32x10_impl::generate_single(this->state_);
|
|
514
|
-
|
|
515
|
-
res = static_cast<RealType>(static_cast<std::int32_t>(res_uint)) * c1 + a1;
|
|
516
|
-
|
|
517
|
-
return res;
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
__attribute__((always_inline)) inline std::uint32_t generate_single() {
|
|
521
|
-
return philox4x32x10_impl::generate_single(this->state_);
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
template <typename UIntType>
|
|
525
|
-
__attribute__((always_inline)) inline auto generate_single_uniform_bits() {
|
|
526
|
-
if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
|
|
527
|
-
return philox4x32x10_impl::generate_single(this->state_);
|
|
528
|
-
}
|
|
529
|
-
else {
|
|
530
|
-
auto uni_res1 = philox4x32x10_impl::generate_single(this->state_);
|
|
531
|
-
auto uni_res2 = philox4x32x10_impl::generate_single(this->state_);
|
|
532
|
-
|
|
533
|
-
return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
void skip_ahead(std::uint64_t num_to_skip) {
|
|
538
|
-
detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip);
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) {
|
|
542
|
-
detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip.size(),
|
|
543
|
-
num_to_skip.begin());
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> state_;
|
|
547
|
-
};
|
|
548
|
-
|
|
549
|
-
} // namespace detail
|
|
550
|
-
} // namespace oneapi::mkl::rng::device
|
|
551
|
-
|
|
552
|
-
#endif // _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
|